src/glsl/lower_instructions.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file lower_instructions.cpp
  26  *
  27  * Many GPUs lack native instructions for certain expression operations, and
  28  * must replace them with some other expression tree.  This pass lowers some
  29  * of the most common cases, allowing the lowering code to be implemented once
  30  * rather than in each driver backend.
  31  *
  32  * Currently supported transformations:
  33  * - SUB_TO_ADD_NEG
  34  * - DIV_TO_MUL_RCP
  35  * - INT_DIV_TO_MUL_RCP
  36  * - EXP_TO_EXP2
  37  * - POW_TO_EXP2
  38  * - LOG_TO_LOG2
  39  * - MOD_TO_FRACT
  40  *
  41  * SUB_TO_ADD_NEG:
  42  * ---------------
  43  * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
  44  *
  45  * This simplifies expression reassociation, and for many backends
  46  * there is no subtract operation separate from adding the negation.
  47  * For backends with native subtract operations, they will probably
  48  * want to recognize add(op0, neg(op1)) or the other way around to
  49  * produce a subtract anyway.
  50  *
  51  * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
  52  * --------------------------------------
  53  * Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
  54  *
  55  * Many GPUs don't have a divide instruction (945 and 965 included),
  56  * but they do have an RCP instruction to compute an approximate
  57  * reciprocal.  By breaking the operation down, constant reciprocals
  58  * can get constant folded.
  59  *
  60  * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
  61  * handles the integer case, converting to and from floating point so that
  62  * RCP is possible.
  63  *
  64  * EXP_TO_EXP2 and LOG_TO_LOG2:
  65  * ----------------------------
  66  * Many GPUs don't have a base e log or exponent instruction, but they
  67  * do have base 2 versions, so this pass converts exp and log to exp2
  68  * and log2 operations.
  69  *
  70  * POW_TO_EXP2:
  71  * -----------
  72  * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
  73  * x**y to 2**(y * log2(x)).
  74  *
  75  * MOD_TO_FRACT:
  76  * -------------
  77  * Breaks an ir_unop_mod expression down to (op1 * fract(op0 / op1))
  78  *
  79  * Many GPUs don't have a MOD instruction (945 and 965 included), and
  80  * if we have to break it down like this anyway, it gives an
  81  * opportunity to do things like constant fold the (1.0 / op1) easily.
  82  */
  83
  84 #include "main/core.h" /* for M_LOG2E */
  85 #include "glsl_types.h"
  86 #include "ir.h"
  87 #include "ir_optimization.h"
  88
  89 class lower_instructions_visitor : public ir_hierarchical_visitor {
  90 public:
  91    lower_instructions_visitor(unsigned lower)
  92       : progress(false), lower(lower) { }
  93
  94    ir_visitor_status visit_leave(ir_expression *);
  95
  96    bool progress;
  97
  98 private:
  99    unsigned lower; /** Bitfield of which operations to lower */
 100
 101    void sub_to_add_neg(ir_expression *);
 102    void div_to_mul_rcp(ir_expression *);
 103    void int_div_to_mul_rcp(ir_expression *);
 104    void mod_to_fract(ir_expression *);
 105    void exp_to_exp2(ir_expression *);
 106    void pow_to_exp2(ir_expression *);
 107    void log_to_log2(ir_expression *);
 108 };
 109
 110 /**
 111  * Determine if a particular type of lowering should occur
 112  */
 113 #define lowering(x) (this->lower & x)
 114
 115 bool
 116 lower_instructions(exec_list *instructions, unsigned what_to_lower)
 117 {
 118    lower_instructions_visitor v(what_to_lower);
 119
 120    visit_list_elements(&v, instructions);
 121    return v.progress;
 122 }
 123
 124 void
 125 lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
 126 {
 127    ir->operation = ir_binop_add;
 128    ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type,
 129                                            ir->operands[1], NULL);
 130    this->progress = true;
 131 }
 132
 133 void
 134 lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
 135 {
 136    assert(ir->operands[1]->type->is_float());
 137
 138    /* New expression for the 1.0 / op1 */
 139    ir_rvalue *expr;
 140    expr = new(ir) ir_expression(ir_unop_rcp,
 141                                 ir->operands[1]->type,
 142                                 ir->operands[1]);
 143
 144    /* op0 / op1 -> op0 * (1.0 / op1) */
 145    ir->operation = ir_binop_mul;
 146    ir->operands[1] = expr;
 147
 148    this->progress = true;
 149 }
 150
 151 void
 152 lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir)
 153 {
 154    assert(ir->operands[1]->type->is_integer());
 155
 156    /* Be careful with integer division -- we need to do it as a
 157     * float and re-truncate, since rcp(n > 1) of an integer would
 158     * just be 0.
 159     */
 160    ir_rvalue *op0, *op1;
 161    const struct glsl_type *vec_type;
 162
 163    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 164                                       ir->operands[1]->type->vector_elements,
 165                                       ir->operands[1]->type->matrix_columns);
 166
 167    if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
 168       op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
 169    else
 170       op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
 171
 172    op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
 173
 174    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 175                                       ir->operands[0]->type->vector_elements,
 176                                       ir->operands[0]->type->matrix_columns);
 177
 178    if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
 179       op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
 180    else
 181       op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
 182
 183    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 184                                       ir->type->vector_elements,
 185                                       ir->type->matrix_columns);
 186
 187    op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
 188
 189    if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) {
 190       ir->operation = ir_unop_f2i;
 191       ir->operands[0] = op0;
 192    } else {
 193       ir->operation = ir_unop_i2u;
 194       ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0);
 195    }
 196    ir->operands[1] = NULL;
 197
 198    this->progress = true;
 199 }
 200
 201 void
 202 lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
 203 {
 204    ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E));
 205
 206    ir->operation = ir_unop_exp2;
 207    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
 208                                            ir->operands[0], log2_e);
 209    this->progress = true;
 210 }
 211
 212 void
 213 lower_instructions_visitor::pow_to_exp2(ir_expression *ir)
 214 {
 215    ir_expression *const log2_x =
 216       new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
 217                             ir->operands[0]);
 218
 219    ir->operation = ir_unop_exp2;
 220    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type,
 221                                            ir->operands[1], log2_x);
 222    ir->operands[1] = NULL;
 223    this->progress = true;
 224 }
 225
 226 void
 227 lower_instructions_visitor::log_to_log2(ir_expression *ir)
 228 {
 229    ir->operation = ir_binop_mul;
 230    ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
 231                                            ir->operands[0], NULL);
 232    ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E));
 233    this->progress = true;
 234 }
 235
 236 void
 237 lower_instructions_visitor::mod_to_fract(ir_expression *ir)
 238 {
 239    ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
 240                                            ir_var_temporary);
 241    this->base_ir->insert_before(temp);
 242
 243    ir_assignment *const assign =
 244       new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
 245                             ir->operands[1], NULL);
 246
 247    this->base_ir->insert_before(assign);
 248
 249    ir_expression *const div_expr =
 250       new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
 251                             ir->operands[0],
 252                             new(ir) ir_dereference_variable(temp));
 253
 254    /* Don't generate new IR that would need to be lowered in an additional
 255     * pass.
 256     */
 257    if (lowering(DIV_TO_MUL_RCP))
 258       div_to_mul_rcp(div_expr);
 259
 260    ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
 261                                            ir->operands[0]->type,
 262                                            div_expr,
 263                                            NULL);
 264
 265    ir->operation = ir_binop_mul;
 266    ir->operands[0] = new(ir) ir_dereference_variable(temp);
 267    ir->operands[1] = expr;
 268    this->progress = true;
 269 }
 270
 271 ir_visitor_status
 272 lower_instructions_visitor::visit_leave(ir_expression *ir)
 273 {
 274    switch (ir->operation) {
 275    case ir_binop_sub:
 276       if (lowering(SUB_TO_ADD_NEG))
 277          sub_to_add_neg(ir);
 278       break;
 279
 280    case ir_binop_div:
 281       if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
 282          int_div_to_mul_rcp(ir);
 283       else if (ir->operands[1]->type->is_float() && lowering(DIV_TO_MUL_RCP))
 284          div_to_mul_rcp(ir);
 285       break;
 286
 287    case ir_unop_exp:
 288       if (lowering(EXP_TO_EXP2))
 289          exp_to_exp2(ir);
 290       break;
 291
 292    case ir_unop_log:
 293       if (lowering(LOG_TO_LOG2))
 294          log_to_log2(ir);
 295       break;
 296
 297    case ir_binop_mod:
 298       if (lowering(MOD_TO_FRACT) && ir->type->is_float())
 299          mod_to_fract(ir);
 300       break;
 301
 302    case ir_binop_pow:
 303       if (lowering(POW_TO_EXP2))
 304          pow_to_exp2(ir);
 305       break;
 306
 307    default:
 308       return visit_continue;
 309    }
 310
 311    return visit_continue;
 312 }