From f0c04e6c22babf2aee2ad1ee85dbd6f996be3712 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 2 Sep 2011 15:18:29 -0700 Subject: [PATCH] i965/vs: Add support for simple algebraic optimizations. We generate silly code for array access, and it's easier to generally support the cleanup than to specifically avoid the bad code in each place we might generate it. Removes 4.6% of instructions from 41.6% of shaders in shader-db, particularly savage2/hon and unigine. v2: Fixes by Ken: Make is_zero/one member functions, and fix a progress flag. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 92 +++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vec4.h | 3 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 1 + 3 files changed, 96 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 436de2f..1f2cebe 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -306,6 +306,98 @@ vec4_visitor::pack_uniform_registers() } } +bool +src_reg::is_zero() const +{ + if (file != IMM) + return false; + + if (type == BRW_REGISTER_TYPE_F) { + return imm.f == 0.0; + } else { + return imm.i == 0; + } +} + +bool +src_reg::is_one() const +{ + if (file != IMM) + return false; + + if (type == BRW_REGISTER_TYPE_F) { + return imm.f == 1.0; + } else { + return imm.i == 1; + } +} + +/** + * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a). + * + * While GLSL IR also performs this optimization, we end up with it in + * our instruction stream for a couple of reasons. One is that we + * sometimes generate silly instructions, for example in array access + * where we'll generate "ADD offset, index, base" even if base is 0. + * The other is that GLSL IR's constant propagation doesn't track the + * components of aggregates, so some VS patterns (initialize matrix to + * 0, accumulate in vertex blending factors) end up breaking down to + * instructions involving 0. + */ +bool +vec4_visitor::opt_algebraic() +{ + bool progress = false; + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + switch (inst->opcode) { + case BRW_OPCODE_ADD: + if (inst->src[1].is_zero()) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = src_reg(); + progress = true; + } + break; + + case BRW_OPCODE_MUL: + if (inst->src[1].is_zero()) { + inst->opcode = BRW_OPCODE_MOV; + switch (inst->src[0].type) { + case BRW_REGISTER_TYPE_F: + inst->src[0] = src_reg(0.0f); + break; + case BRW_REGISTER_TYPE_D: + inst->src[0] = src_reg(0); + break; + case BRW_REGISTER_TYPE_UD: + inst->src[0] = src_reg(0u); + break; + default: + assert(!"not reached"); + inst->src[0] = src_reg(0.0f); + break; + } + inst->src[1] = src_reg(); + progress = true; + } else if (inst->src[1].is_one()) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = src_reg(); + progress = true; + } + break; + default: + break; + } + } + + if (progress) + this->live_intervals_valid = false; + + return progress; +} + /** * Only a limited number of hardware registers may be used for push * constants, so this turns access to the overflowed constants into diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 7739a15..058615f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -163,6 +163,8 @@ public: } bool equals(src_reg *r); + bool is_zero() const; + bool is_one() const; src_reg(class vec4_visitor *v, const struct glsl_type *type); @@ -401,6 +403,7 @@ public: bool dead_code_eliminate(); bool virtual_grf_interferes(int a, int b); bool opt_copy_propagation(); + bool opt_algebraic(); vec4_instruction *emit(vec4_instruction *inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index c40c41f..7031d2a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -615,6 +615,7 @@ vec4_visitor::run() progress = false; progress = dead_code_eliminate() || progress; progress = opt_copy_propagation() || progress; + progress = opt_algebraic() || progress; } while (progress); -- 2.7.4