2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file brw_wm_channel_expressions.cpp
27 * Breaks vector operations down into operations on each component.
29 * The 965 fragment shader receives 8 or 16 pixels at a time, so each
30 * channel of a vector is laid out as 1 or 2 8-float registers. Each
31 * ALU operation operates on one of those channel registers. As a
32 * result, there is no value to the 965 fragment shader in tracking
33 * "vector" expressions in the sense of GLSL fragment shaders, when
34 * doing a channel at a time may help in constant folding, algebraic
35 * simplification, and reducing the liveness of channel registers.
37 * The exception to the desire to break everything down to floats is
38 * texturing. The texture sampler returns a writemasked masked
39 * 4/8-register sequence containing the texture values. We don't want
40 * to dispatch to the sampler separately for each channel we need, so
41 * we do retain the vector types in that case.
45 #include "main/core.h"
48 #include "../glsl/ir.h"
49 #include "../glsl/ir_expression_flattening.h"
50 #include "../glsl/glsl_types.h"
52 class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
54 ir_channel_expressions_visitor()
56 this->progress = false;
60 ir_visitor_status visit_leave(ir_assignment *);
62 ir_rvalue *get_element(ir_variable *var, unsigned int element);
63 void assign(ir_assignment *ir, int elem, ir_rvalue *val);
70 channel_expressions_predicate(ir_instruction *ir)
72 ir_expression *expr = ir->as_expression();
78 for (i = 0; i < expr->get_num_operands(); i++) {
79 if (expr->operands[i]->type->is_vector())
87 brw_do_channel_expressions(exec_list *instructions)
89 ir_channel_expressions_visitor v;
91 /* Pull out any matrix expression to a separate assignment to a
92 * temp. This will make our handling of the breakdown to
93 * operations on the matrix's vector components much easier.
95 do_expression_flattening(instructions, channel_expressions_predicate);
97 visit_list_elements(&v, instructions);
103 ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
105 ir_dereference *deref;
107 if (var->type->is_scalar())
108 return new(mem_ctx) ir_dereference_variable(var);
110 assert(elem < var->type->components());
111 deref = new(mem_ctx) ir_dereference_variable(var);
112 return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
116 ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
118 ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
119 ir_assignment *assign;
121 /* This assign-of-expression should have been generated by the
122 * expression flattening visitor (since we never short circit to
123 * not flatten, even for plain assignments of variables), so the
124 * writemask is always full.
126 assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
128 assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
129 ir->insert_before(assign);
133 ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
135 ir_expression *expr = ir->rhs->as_expression();
136 bool found_vector = false;
137 unsigned int i, vector_elements = 1;
138 ir_variable *op_var[2];
141 return visit_continue;
144 this->mem_ctx = ralloc_parent(ir);
146 for (i = 0; i < expr->get_num_operands(); i++) {
147 if (expr->operands[i]->type->is_vector()) {
149 vector_elements = expr->operands[i]->type->vector_elements;
154 return visit_continue;
156 /* Store the expression operands in temps so we can use them
159 for (i = 0; i < expr->get_num_operands(); i++) {
160 ir_assignment *assign;
161 ir_dereference *deref;
163 assert(!expr->operands[i]->type->is_matrix());
165 op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
166 "channel_expressions",
168 ir->insert_before(op_var[i]);
170 deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
171 assign = new(mem_ctx) ir_assignment(deref,
174 ir->insert_before(assign);
177 const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
180 /* OK, time to break down this vector operation. */
181 switch (expr->operation) {
182 case ir_unop_bit_not:
183 case ir_unop_logic_not:
205 case ir_unop_round_even:
208 case ir_unop_sin_reduced:
209 case ir_unop_cos_reduced:
212 for (i = 0; i < vector_elements; i++) {
213 ir_rvalue *op0 = get_element(op_var[0], i);
215 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
230 case ir_binop_lshift:
231 case ir_binop_rshift:
232 case ir_binop_bit_and:
233 case ir_binop_bit_xor:
234 case ir_binop_bit_or:
236 case ir_binop_greater:
237 case ir_binop_lequal:
238 case ir_binop_gequal:
240 case ir_binop_nequal:
241 for (i = 0; i < vector_elements; i++) {
242 ir_rvalue *op0 = get_element(op_var[0], i);
243 ir_rvalue *op1 = get_element(op_var[1], i);
245 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
254 temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
256 get_element(op_var[0], 0),
257 get_element(op_var[0], 1));
259 for (i = 2; i < vector_elements; i++) {
260 temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
262 get_element(op_var[0], i),
270 ir_expression *last = NULL;
271 for (i = 0; i < vector_elements; i++) {
272 ir_rvalue *op0 = get_element(op_var[0], i);
273 ir_rvalue *op1 = get_element(op_var[1], i);
276 temp = new(mem_ctx) ir_expression(ir_binop_mul,
281 last = new(mem_ctx) ir_expression(ir_binop_add,
293 case ir_binop_logic_and:
294 case ir_binop_logic_xor:
295 case ir_binop_logic_or:
298 assert(!"not reached: expression operates on scalars only");
300 case ir_binop_all_equal:
301 case ir_binop_any_nequal: {
302 ir_expression *last = NULL;
303 for (i = 0; i < vector_elements; i++) {
304 ir_rvalue *op0 = get_element(op_var[0], i);
305 ir_rvalue *op1 = get_element(op_var[1], i);
307 ir_expression_operation join;
309 if (expr->operation == ir_binop_all_equal)
310 join = ir_binop_logic_and;
312 join = ir_binop_logic_or;
314 temp = new(mem_ctx) ir_expression(expr->operation,
319 last = new(mem_ctx) ir_expression(join,
331 assert(!"noise should have been broken down to function call");
333 case ir_quadop_vector:
334 assert(!"should have been lowered");
339 this->progress = true;
341 return visit_continue;