From 1cb57ea493d892bf5065e5fb0c5dd745744cc71c Mon Sep 17 00:00:00 2001
From: Chris Forbes <chrisf@ijw.co.nz>
Date: Sun, 9 Dec 2012 22:03:49 +1300
Subject: [PATCH] i965/vs: Fix gen6+ math operand quirks in one place

This causes immediate values to get moved to a temp on gen7, which is needed
for an upcoming change but hadn't happened in the visitor until then.

v2: Drop gen > 7 checks (doesn't exist), and style-fix comments (changes by
    anholt).

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_vec4.h           |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 61 ++++++++++++--------------
 2 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 92d7bfd..d7c1cce 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -426,6 +426,7 @@ public:
    void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
    void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
    void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+   src_reg fix_math_operand(src_reg src);
 
    void swizzle_result(ir_texture *ir, src_reg orig_val, int sampler);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 544974a..97593d0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -220,21 +220,33 @@ vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements
    emit(dot_opcodes[elements - 2], dst, src0, src1);
 }
 
-void
-vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
+src_reg
+vec4_visitor::fix_math_operand(src_reg src)
 {
    /* The gen6 math instruction ignores the source modifiers --
     * swizzle, abs, negate, and at least some parts of the register
     * region description.
     *
-    * While it would seem that this MOV could be avoided at this point
-    * in the case that the swizzle is matched up with the destination
-    * writemask, note that uniform packing and register allocation
-    * could rearrange our swizzle, so let's leave this matter up to
-    * copy propagation later.
+    * Rather than trying to enumerate all these cases, *always* expand the
+    * operand to a temp GRF for gen6.
+    *
+    * For gen7, keep the operand as-is, except if immediate, which gen7 still
+    * can't use.
     */
-   src_reg temp_src = src_reg(this, glsl_type::vec4_type);
-   emit(MOV(dst_reg(temp_src), src));
+
+   if (intel->gen == 7 && src.file != IMM)
+      return src;
+
+   dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
+   expanded.type = src.type;
+   emit(MOV(expanded, src));
+   return src_reg(expanded);
+}
+
+void
+vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
+{
+   src = fix_math_operand(src);
 
    if (dst.writemask != WRITEMASK_XYZW) {
       /* The gen6 math instruction must be align1, so we can't do
@@ -242,11 +254,11 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
        */
       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 
-      emit(opcode, temp_dst, temp_src);
+      emit(opcode, temp_dst, src);
 
       emit(MOV(dst, src_reg(temp_dst)));
    } else {
-      emit(opcode, dst, temp_src);
+      emit(opcode, dst, src);
    }
 }
 
@@ -275,9 +287,7 @@ vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
       return;
    }
 
-   if (intel->gen >= 7) {
-      emit(opcode, dst, src);
-   } else if (intel->gen == 6) {
+   if (intel->gen >= 6) {
       return emit_math1_gen6(opcode, dst, src);
    } else {
       return emit_math1_gen4(opcode, dst, src);
@@ -288,23 +298,8 @@ void
 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 			      dst_reg dst, src_reg src0, src_reg src1)
 {
-   src_reg expanded;
-
-   /* The gen6 math instruction ignores the source modifiers --
-    * swizzle, abs, negate, and at least some parts of the register
-    * region description.  Move the sources to temporaries to make it
-    * generally work.
-    */
-
-   expanded = src_reg(this, glsl_type::vec4_type);
-   expanded.type = src0.type;
-   emit(MOV(dst_reg(expanded), src0));
-   src0 = expanded;
-
-   expanded = src_reg(this, glsl_type::vec4_type);
-   expanded.type = src1.type;
-   emit(MOV(dst_reg(expanded), src1));
-   src1 = expanded;
+   src0 = fix_math_operand(src0);
+   src1 = fix_math_operand(src1);
 
    if (dst.writemask != WRITEMASK_XYZW) {
       /* The gen6 math instruction must be align1, so we can't do
@@ -344,9 +339,7 @@ vec4_visitor::emit_math(enum opcode opcode,
       return;
    }
 
-   if (intel->gen >= 7) {
-      emit(opcode, dst, src0, src1);
-   } else if (intel->gen == 6) {
+   if (intel->gen >= 6) {
       return emit_math2_gen6(opcode, dst, src0, src1);
    } else {
       return emit_math2_gen4(opcode, dst, src0, src1);
-- 
2.7.4