i965/vec4: Add and use byte-MOV instruction for unpack 4x8.
authorMatt Turner <mattst88@gmail.com>
Thu, 12 Feb 2015 01:42:43 +0000 (01:42 +0000)
committerMatt Turner <mattst88@gmail.com>
Fri, 20 Feb 2015 05:16:44 +0000 (21:16 -0800)
Previously we were using a B/UB source in an Align16 instruction, which
is illegal. It for some reason works on all platforms, except Broadwell.

Cc: "10.5" <mesa-stable@lists.freedesktop.org>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86811
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index a597d6b..17c27dd 100644 (file)
@@ -911,6 +911,7 @@ enum opcode {
 
    SHADER_OPCODE_URB_WRITE_SIMD8,
 
+   VEC4_OPCODE_MOV_BYTES,
    VEC4_OPCODE_PACK_BYTES,
    VEC4_OPCODE_UNPACK_UNIFORM,
 
index b0e9c82..fbb20bc 100644 (file)
@@ -471,6 +471,8 @@ brw_instruction_name(enum opcode op)
    case SHADER_OPCODE_URB_WRITE_SIMD8:
       return "gen8_urb_write_simd8";
 
+   case VEC4_OPCODE_MOV_BYTES:
+      return "mov_bytes";
    case VEC4_OPCODE_PACK_BYTES:
       return "pack_bytes";
    case VEC4_OPCODE_UNPACK_UNIFORM:
index 3d03b60..67c8285 100644 (file)
@@ -1512,6 +1512,22 @@ vec4_generator::generate_code(const cfg_t *cfg)
          generate_unpack_flags(dst);
          break;
 
+      case VEC4_OPCODE_MOV_BYTES: {
+         /* Moves the low byte from each channel, using an Align1 access mode
+          * and a <4,1,0> source region.
+          */
+         assert(src[0].type == BRW_REGISTER_TYPE_UB ||
+                src[0].type == BRW_REGISTER_TYPE_B);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_1);
+         src[0].vstride = BRW_VERTICAL_STRIDE_4;
+         src[0].width = BRW_WIDTH_1;
+         src[0].hstride = BRW_HORIZONTAL_STRIDE_0;
+         brw_MOV(p, dst, src[0]);
+         brw_set_default_access_mode(p, BRW_ALIGN_16);
+         break;
+      }
+
       case VEC4_OPCODE_PACK_BYTES: {
          /* Is effectively:
           *
index be071d7..f6f589d 100644 (file)
@@ -489,7 +489,7 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
 
    shifted.type = BRW_REGISTER_TYPE_UB;
    dst_reg f(this, glsl_type::vec4_type);
-   emit(MOV(f, src_reg(shifted)));
+   emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
 
    emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f)));
 }
@@ -511,7 +511,7 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
 
    shifted.type = BRW_REGISTER_TYPE_B;
    dst_reg f(this, glsl_type::vec4_type);
-   emit(MOV(f, src_reg(shifted)));
+   emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
 
    dst_reg scaled(this, glsl_type::vec4_type);
    emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f)));