intel/compiler: workaround for SIMD8 half-float MAD in gen8

author Iago Toral Quiroga <itoral@igalia.com>

Mon, 21 Jan 2019 11:11:44 +0000 (12:11 +0100)

committer Juan A. Suarez Romero <jasuarez@igalia.com>

Thu, 18 Apr 2019 09:05:18 +0000 (11:05 +0200)
author Iago Toral Quiroga <itoral@igalia.com>
Mon, 21 Jan 2019 11:11:44 +0000 (12:11 +0100)
committer Juan A. Suarez Romero <jasuarez@igalia.com>
Thu, 18 Apr 2019 09:05:18 +0000 (11:05 +0200)
diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp

index c60d470..a76fd26 100644 (file)
--- a/src/intel/compiler/brw_fs_lower_regioning.cpp
+++ b/src/intel/compiler/brw_fs_lower_regioning.cpp
@@ -127,20 +127,37 @@ namespace {
     has_invalid_src_region(const gen_device_info *devinfo, const fs_inst *inst,
                            unsigned i)
     {
-      if (is_unordered(inst) || inst->is_control_source(i)) {
+      if (is_unordered(inst) || inst->is_control_source(i))
           return false;
-      } else {
-         const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);
-         const unsigned src_byte_stride = inst->src[i].stride *
-            type_sz(inst->src[i].type);
-         const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE;
-         const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE;
  
-         return has_dst_aligned_region_restriction(devinfo, inst) &&
-                !is_uniform(inst->src[i]) &&
-                (src_byte_stride != dst_byte_stride ||
-                 src_byte_offset != dst_byte_offset);
+      /* Empirical testing shows that Broadwell has a bug affecting half-float
+       * MAD instructions when any of its sources has a non-zero offset, such
+       * as:
+       *
+       * mad(8) g18<1>HF -g17<4,4,1>HF g14.8<4,4,1>HF g11<4,4,1>HF { align16 1Q };
+       *
+       * We used to generate code like this for SIMD8 executions where we
+       * used to pack components Y and W of a vector at offset 16B of a SIMD
+       * register. The problem doesn't occur if the stride of the source is 0.
+       */
+      if (devinfo->gen == 8 &&
+          inst->opcode == BRW_OPCODE_MAD &&
+          inst->src[i].type == BRW_REGISTER_TYPE_HF &&
+          reg_offset(inst->src[i]) % REG_SIZE > 0 &&
+          inst->src[i].stride != 0) {
+         return true;
        }
+
+      const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);
+      const unsigned src_byte_stride = inst->src[i].stride *
+         type_sz(inst->src[i].type);
+      const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE;
+      const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE;
+
+      return has_dst_aligned_region_restriction(devinfo, inst) &&
+             !is_uniform(inst->src[i]) &&
+             (src_byte_stride != dst_byte_stride ||
+              src_byte_offset != dst_byte_offset);
     }
  
     /*
author	Iago Toral Quiroga <itoral@igalia.com>
	Mon, 21 Jan 2019 11:11:44 +0000 (12:11 +0100)
committer	Juan A. Suarez Romero <jasuarez@igalia.com>
	Thu, 18 Apr 2019 09:05:18 +0000 (11:05 +0200)