i965/vs: Add support for ir_binop_pow.
authorEric Anholt <eric@anholt.net>
Tue, 9 Aug 2011 19:30:41 +0000 (12:30 -0700)
committerEric Anholt <eric@anholt.net>
Tue, 16 Aug 2011 20:04:43 +0000 (13:04 -0700)
Fixes vs-pow-float-float.

src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index b5f442e..0820215 100644 (file)
@@ -444,12 +444,22 @@ public:
    void generate_vs_instruction(vec4_instruction *inst,
                                struct brw_reg dst,
                                struct brw_reg *src);
+
    void generate_math1_gen4(vec4_instruction *inst,
                            struct brw_reg dst,
                            struct brw_reg src);
    void generate_math1_gen6(vec4_instruction *inst,
                            struct brw_reg dst,
                            struct brw_reg src);
+   void generate_math2_gen4(vec4_instruction *inst,
+                           struct brw_reg dst,
+                           struct brw_reg src0,
+                           struct brw_reg src1);
+   void generate_math2_gen6(vec4_instruction *inst,
+                           struct brw_reg dst,
+                           struct brw_reg src0,
+                           struct brw_reg src1);
+
    void generate_urb_write(vec4_instruction *inst);
    void generate_oword_dual_block_offsets(struct brw_reg m1,
                                          struct brw_reg index);
index effc82a..df9521c 100644 (file)
@@ -245,6 +245,15 @@ vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
            BRW_MATH_PRECISION_FULL);
 }
 
+static void
+check_gen6_math_src_arg(struct brw_reg src)
+{
+   /* Source swizzles are ignored. */
+   assert(!src.abs);
+   assert(!src.negate);
+   assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
+}
+
 void
 vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
                                  struct brw_reg dst,
@@ -252,10 +261,7 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
 {
    /* Can't do writemask because math can't be align16. */
    assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
-   /* Source swizzles are ignored. */
-   assert(!src.abs);
-   assert(!src.negate);
-   assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
+   check_gen6_math_src_arg(src);
 
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_math(p,
@@ -270,6 +276,49 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
 }
 
 void
+vec4_visitor::generate_math2_gen6(vec4_instruction *inst,
+                                 struct brw_reg dst,
+                                 struct brw_reg src0,
+                                 struct brw_reg src1)
+{
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+   /* Source swizzles are ignored. */
+   check_gen6_math_src_arg(src0);
+   check_gen6_math_src_arg(src1);
+
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math2(p,
+            dst,
+            brw_math_function(inst->opcode),
+            src0, src1);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
+vec4_visitor::generate_math2_gen4(vec4_instruction *inst,
+                                 struct brw_reg dst,
+                                 struct brw_reg src0,
+                                 struct brw_reg src1)
+{
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+
+   brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1);
+
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math(p,
+           dst,
+           brw_math_function(inst->opcode),
+           BRW_MATH_SATURATE_NONE,
+           inst->base_mrf,
+           src0,
+           BRW_MATH_DATA_VECTOR,
+           BRW_MATH_PRECISION_FULL);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
 vec4_visitor::generate_urb_write(vec4_instruction *inst)
 {
    brw_urb_WRITE(p,
@@ -442,7 +491,11 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
       break;
 
    case SHADER_OPCODE_POW:
-      assert(!"finishme");
+      if (intel->gen >= 6) {
+        generate_math2_gen6(inst, dst, src[0], src[1]);
+      } else {
+        generate_math2_gen4(inst, dst, src[0], src[1]);
+      }
       break;
 
    case VS_OPCODE_URB_WRITE:
index f4756a9..f9447d7 100644 (file)
@@ -188,11 +188,11 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode,
     */
 
    expanded = src_reg(this, glsl_type::vec4_type);
-   emit(BRW_OPCODE_MOV, dst, src0);
+   emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
    src0 = expanded;
 
    expanded = src_reg(this, glsl_type::vec4_type);
-   emit(BRW_OPCODE_MOV, dst, src1);
+   emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
    src1 = expanded;
 
    if (dst.writemask != WRITEMASK_XYZW) {