i965/fs: Emit MAD instructions when possible.
authorMatt Turner <mattst88@gmail.com>
Mon, 27 Oct 2014 05:08:15 +0000 (22:08 -0700)
committerMatt Turner <mattst88@gmail.com>
Wed, 18 Feb 2015 04:44:09 +0000 (20:44 -0800)
Previously we didn't emit MAD instructions since they cannot take
immediate arguments, but with the opt_combine_constants() pass we can
handle this properly.

total instructions in shared programs: 5920017 -> 5733278 (-3.15%)
instructions in affected programs:     3625153 -> 3438414 (-5.15%)
helped:                                22017
HURT:                                  870
GAINED:                                91
LOST:                                  49

Without constant pooling, this patch is a complete loss:

total instructions in shared programs: 5912589 -> 5987888 (1.27%)
instructions in affected programs:     3190050 -> 3265349 (2.36%)
helped:                                1564
HURT:                                  17827
GAINED:                                27
LOST:                                  101

And since the constant pooling patch by itself hurt a bunch of things,
from before constant pooling to this patch the results are:

total instructions in shared programs: 5895414 -> 5747946 (-2.50%)
instructions in affected programs:     3617993 -> 3470525 (-4.08%)
helped:                                20478
HURT:                                  4469
GAINED:                                54
LOST:                                  146

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs_fp.cpp
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index 7f2874d..382a54a 100644 (file)
@@ -316,9 +316,14 @@ fs_visitor::emit_fragment_program_code()
       case OPCODE_MAD:
          for (int i = 0; i < 4; i++) {
             if (fpi->DstReg.WriteMask & (1 << i)) {
-               fs_reg temp = vgrf(glsl_type::float_type);
-               emit(MUL(temp, offset(src[0], i), offset(src[1], i)));
-               emit(ADD(offset(dst, i), temp, offset(src[2], i)));
+               if (brw->gen >= 6) {
+                  emit(MAD(offset(dst, i), offset(src[2], i),
+                           offset(src[1], i), offset(src[0], i)));
+               } else {
+                  fs_reg temp = vgrf(glsl_type::float_type);
+                  emit(MUL(temp, offset(src[0], i), offset(src[1], i)));
+                  emit(ADD(offset(dst, i), temp, offset(src[2], i)));
+               }
             }
          }
          break;
index c7f321f..a760114 100644 (file)
@@ -456,11 +456,6 @@ fs_visitor::try_emit_mad(ir_expression *ir)
          return false;
    }
 
-   if (nonmul->as_constant() ||
-       mul->operands[0]->as_constant() ||
-       mul->operands[1]->as_constant())
-      return false;
-
    nonmul->accept(this);
    fs_reg src0 = this->result;