2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "main/macros.h"
29 src_reg::src_reg(dst_reg reg)
33 this->file = reg.file;
35 this->reg_offset = reg.reg_offset;
36 this->type = reg.type;
42 for (int i = 0; i < 4; i++) {
43 if (!(reg.writemask & (1 << i)))
46 swizzles[next_chan++] = last = i;
49 for (; next_chan < 4; next_chan++) {
50 swizzles[next_chan] = last;
53 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
54 swizzles[2], swizzles[3]);
57 dst_reg::dst_reg(src_reg reg)
61 this->file = reg.file;
63 this->reg_offset = reg.reg_offset;
64 this->type = reg.type;
65 this->writemask = WRITEMASK_XYZW;
69 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
70 src_reg src0, src_reg src1, src_reg src2)
72 vec4_instruction *inst = new(mem_ctx) vec4_instruction();
74 inst->opcode = opcode;
79 inst->ir = this->base_ir;
80 inst->annotation = this->current_annotation;
82 this->instructions.push_tail(inst);
89 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
91 return emit(opcode, dst, src0, src1, src_reg());
95 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
97 assert(dst.writemask != 0);
98 return emit(opcode, dst, src0, src_reg(), src_reg());
102 vec4_visitor::emit(enum opcode opcode)
104 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
108 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
110 static enum opcode dot_opcodes[] = {
111 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
114 emit(dot_opcodes[elements - 2], dst, src0, src1);
118 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
120 /* The gen6 math instruction ignores the source modifiers --
121 * swizzle, abs, negate, and at least some parts of the register
122 * region description. Move the source to the corresponding slots
123 * of the destination generally work.
125 src_reg expanded = src_reg(this, glsl_type::float_type);
126 emit(BRW_OPCODE_MOV, dst, src);
129 emit(opcode, dst, src);
133 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
135 vec4_instruction *inst = emit(opcode, dst, src);
141 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
144 case SHADER_OPCODE_RCP:
145 case SHADER_OPCODE_RSQ:
146 case SHADER_OPCODE_SQRT:
147 case SHADER_OPCODE_EXP2:
148 case SHADER_OPCODE_LOG2:
149 case SHADER_OPCODE_SIN:
150 case SHADER_OPCODE_COS:
153 assert(!"not reached: bad math opcode");
157 if (intel->gen >= 6) {
158 return emit_math1_gen6(opcode, dst, src);
160 return emit_math1_gen4(opcode, dst, src);
165 vec4_visitor::emit_math2_gen6(enum opcode opcode,
166 dst_reg dst, src_reg src0, src_reg src1)
170 /* The gen6 math instruction ignores the source modifiers --
171 * swizzle, abs, negate, and at least some parts of the register
172 * region description. Move the sources to temporaries to make it
176 expanded = src_reg(this, glsl_type::vec4_type);
177 emit(BRW_OPCODE_MOV, dst, src0);
180 expanded = src_reg(this, glsl_type::vec4_type);
181 emit(BRW_OPCODE_MOV, dst, src1);
184 emit(opcode, dst, src0, src1);
188 vec4_visitor::emit_math2_gen4(enum opcode opcode,
189 dst_reg dst, src_reg src0, src_reg src1)
191 vec4_instruction *inst = emit(opcode, dst, src0, src1);
197 vec4_visitor::emit_math(enum opcode opcode,
198 dst_reg dst, src_reg src0, src_reg src1)
200 assert(opcode == SHADER_OPCODE_POW);
202 if (intel->gen >= 6) {
203 return emit_math2_gen6(opcode, dst, src0, src1);
205 return emit_math2_gen4(opcode, dst, src0, src1);
210 vec4_visitor::visit_instructions(const exec_list *list)
212 foreach_iter(exec_list_iterator, iter, *list) {
213 ir_instruction *ir = (ir_instruction *)iter.get();
222 type_size(const struct glsl_type *type)
227 switch (type->base_type) {
230 case GLSL_TYPE_FLOAT:
232 if (type->is_matrix()) {
233 return type->matrix_columns;
235 /* Regardless of size of vector, it gets a vec4. This is bad
236 * packing for things like floats, but otherwise arrays become a
237 * mess. Hopefully a later pass over the code can pack scalars
238 * down if appropriate.
242 case GLSL_TYPE_ARRAY:
243 assert(type->length > 0);
244 return type_size(type->fields.array) * type->length;
245 case GLSL_TYPE_STRUCT:
247 for (i = 0; i < type->length; i++) {
248 size += type_size(type->fields.structure[i].type);
251 case GLSL_TYPE_SAMPLER:
252 /* Samplers take up one slot in UNIFORMS[], but they're baked in
263 vec4_visitor::virtual_grf_alloc(int size)
265 if (virtual_grf_array_size <= virtual_grf_count) {
266 if (virtual_grf_array_size == 0)
267 virtual_grf_array_size = 16;
269 virtual_grf_array_size *= 2;
270 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
271 virtual_grf_array_size);
273 virtual_grf_sizes[virtual_grf_count] = size;
274 return virtual_grf_count++;
277 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
282 this->reg = v->virtual_grf_alloc(type_size(type));
284 if (type->is_array() || type->is_record()) {
285 this->swizzle = BRW_SWIZZLE_NOOP;
287 this->swizzle = swizzle_for_size(type->vector_elements);
290 this->type = brw_type_for_base_type(type);
293 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
298 this->reg = v->virtual_grf_alloc(type_size(type));
300 if (type->is_array() || type->is_record()) {
301 this->writemask = WRITEMASK_XYZW;
303 this->writemask = (1 << type->vector_elements) - 1;
306 this->type = brw_type_for_base_type(type);
310 vec4_visitor::variable_storage(ir_variable *var)
312 return (dst_reg *)hash_table_find(this->variable_ht, var);
316 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
318 ir_expression *expr = ir->as_expression();
322 vec4_instruction *inst;
324 assert(expr->get_num_operands() <= 2);
325 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
326 assert(expr->operands[i]->type->is_scalar());
328 expr->operands[i]->accept(this);
329 op[i] = this->result;
332 switch (expr->operation) {
333 case ir_unop_logic_not:
334 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
335 inst->conditional_mod = BRW_CONDITIONAL_Z;
338 case ir_binop_logic_xor:
339 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
340 inst->conditional_mod = BRW_CONDITIONAL_NZ;
343 case ir_binop_logic_or:
344 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
345 inst->conditional_mod = BRW_CONDITIONAL_NZ;
348 case ir_binop_logic_and:
349 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
350 inst->conditional_mod = BRW_CONDITIONAL_NZ;
354 if (intel->gen >= 6) {
355 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
357 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
359 inst->conditional_mod = BRW_CONDITIONAL_NZ;
363 if (intel->gen >= 6) {
364 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
366 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
368 inst->conditional_mod = BRW_CONDITIONAL_NZ;
371 case ir_binop_greater:
372 case ir_binop_gequal:
374 case ir_binop_lequal:
376 case ir_binop_all_equal:
377 case ir_binop_nequal:
378 case ir_binop_any_nequal:
379 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
380 inst->conditional_mod =
381 brw_conditional_for_comparison(expr->operation);
385 assert(!"not reached");
393 if (intel->gen >= 6) {
394 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
395 this->result, src_reg(1));
396 inst->conditional_mod = BRW_CONDITIONAL_NZ;
398 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
399 inst->conditional_mod = BRW_CONDITIONAL_NZ;
404 * Emit a gen6 IF statement with the comparison folded into the IF
408 vec4_visitor::emit_if_gen6(ir_if *ir)
410 ir_expression *expr = ir->condition->as_expression();
414 vec4_instruction *inst;
417 assert(expr->get_num_operands() <= 2);
418 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
419 assert(expr->operands[i]->type->is_scalar());
421 expr->operands[i]->accept(this);
422 op[i] = this->result;
425 switch (expr->operation) {
426 case ir_unop_logic_not:
427 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
428 inst->conditional_mod = BRW_CONDITIONAL_Z;
431 case ir_binop_logic_xor:
432 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
433 inst->conditional_mod = BRW_CONDITIONAL_NZ;
436 case ir_binop_logic_or:
437 temp = dst_reg(this, glsl_type::bool_type);
438 emit(BRW_OPCODE_OR, temp, op[0], op[1]);
439 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
440 inst->conditional_mod = BRW_CONDITIONAL_NZ;
443 case ir_binop_logic_and:
444 temp = dst_reg(this, glsl_type::bool_type);
445 emit(BRW_OPCODE_AND, temp, op[0], op[1]);
446 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
447 inst->conditional_mod = BRW_CONDITIONAL_NZ;
451 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
452 inst->conditional_mod = BRW_CONDITIONAL_NZ;
456 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
457 inst->conditional_mod = BRW_CONDITIONAL_NZ;
460 case ir_binop_greater:
461 case ir_binop_gequal:
463 case ir_binop_lequal:
465 case ir_binop_all_equal:
466 case ir_binop_nequal:
467 case ir_binop_any_nequal:
468 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
469 inst->conditional_mod =
470 brw_conditional_for_comparison(expr->operation);
473 assert(!"not reached");
474 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
475 inst->conditional_mod = BRW_CONDITIONAL_NZ;
481 ir->condition->accept(this);
483 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
484 this->result, src_reg(0));
485 inst->conditional_mod = BRW_CONDITIONAL_NZ;
489 vec4_visitor::visit(ir_variable *ir)
493 if (variable_storage(ir))
498 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
499 reg->type = brw_type_for_base_type(ir->type);
500 hash_table_insert(this->variable_ht, reg, ir);
504 reg = new(mem_ctx) dst_reg(this, ir->type);
505 hash_table_insert(this->variable_ht, reg, ir);
507 for (int i = 0; i < type_size(ir->type); i++) {
508 output_reg[ir->location + i] = *reg;
509 output_reg[ir->location + i].reg_offset = i;
513 case ir_var_temporary:
514 reg = new(mem_ctx) dst_reg(this, ir->type);
515 hash_table_insert(this->variable_ht, reg, ir);
520 /* FINISHME: uniforms */
526 vec4_visitor::visit(ir_loop *ir)
528 ir_dereference_variable *counter = NULL;
530 /* We don't want debugging output to print the whole body of the
531 * loop as the annotation.
533 this->base_ir = NULL;
535 if (ir->counter != NULL)
536 counter = new(ir) ir_dereference_variable(ir->counter);
538 if (ir->from != NULL) {
539 assert(ir->counter != NULL);
541 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
551 new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
553 ir_if *if_stmt = new(ir) ir_if(e);
555 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
557 if_stmt->then_instructions.push_tail(brk);
559 if_stmt->accept(this);
566 visit_instructions(&ir->body_instructions);
570 new(ir) ir_expression(ir_binop_add, counter->type,
571 counter, ir->increment);
573 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
580 emit(BRW_OPCODE_WHILE);
584 vec4_visitor::visit(ir_loop_jump *ir)
587 case ir_loop_jump::jump_break:
588 emit(BRW_OPCODE_BREAK);
590 case ir_loop_jump::jump_continue:
591 emit(BRW_OPCODE_CONTINUE);
598 vec4_visitor::visit(ir_function_signature *ir)
605 vec4_visitor::visit(ir_function *ir)
607 /* Ignore function bodies other than main() -- we shouldn't see calls to
608 * them since they should all be inlined.
610 if (strcmp(ir->name, "main") == 0) {
611 const ir_function_signature *sig;
614 sig = ir->matching_signature(&empty);
618 visit_instructions(&sig->body);
623 vec4_visitor::try_emit_sat(ir_expression *ir)
625 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
629 sat_src->accept(this);
630 src_reg src = this->result;
632 this->result = src_reg(this, ir->type);
633 vec4_instruction *inst;
634 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
635 inst->saturate = true;
641 vec4_visitor::emit_bool_comparison(unsigned int op,
642 dst_reg dst, src_reg src0, src_reg src1)
644 /* original gen4 does destination conversion before comparison. */
646 dst.type = src0.type;
648 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
649 inst->conditional_mod = brw_conditional_for_comparison(op);
651 dst.type = BRW_REGISTER_TYPE_D;
652 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
656 vec4_visitor::visit(ir_expression *ir)
658 unsigned int operand;
659 src_reg op[Elements(ir->operands)];
662 vec4_instruction *inst;
664 if (try_emit_sat(ir))
667 for (operand = 0; operand < ir->get_num_operands(); operand++) {
668 this->result.file = BAD_FILE;
669 ir->operands[operand]->accept(this);
670 if (this->result.file == BAD_FILE) {
671 printf("Failed to get tree for expression operand:\n");
672 ir->operands[operand]->print();
675 op[operand] = this->result;
677 /* Matrix expression operands should have been broken down to vector
678 * operations already.
680 assert(!ir->operands[operand]->type->is_matrix());
683 int vector_elements = ir->operands[0]->type->vector_elements;
684 if (ir->operands[1]) {
685 vector_elements = MAX2(vector_elements,
686 ir->operands[1]->type->vector_elements);
689 this->result.file = BAD_FILE;
691 /* Storage for our result. Ideally for an assignment we'd be using
692 * the actual storage for the result here, instead.
694 result_src = src_reg(this, ir->type);
695 /* convenience for the emit functions below. */
696 result_dst = dst_reg(result_src);
697 /* If nothing special happens, this is the result. */
698 this->result = result_src;
699 /* Limit writes to the channels that will be used by result_src later.
700 * This does limit this temp's use as a temporary for multi-instruction
703 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
705 switch (ir->operation) {
706 case ir_unop_logic_not:
707 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
708 * ones complement of the whole register, not just bit 0.
710 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
713 op[0].negate = !op[0].negate;
714 this->result = op[0];
718 op[0].negate = false;
719 this->result = op[0];
723 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
725 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
726 inst->conditional_mod = BRW_CONDITIONAL_G;
727 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
728 inst->predicate = BRW_PREDICATE_NORMAL;
730 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
731 inst->conditional_mod = BRW_CONDITIONAL_L;
732 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
733 inst->predicate = BRW_PREDICATE_NORMAL;
738 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
742 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
745 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
749 assert(!"not reached: should be handled by ir_explog_to_explog2");
752 case ir_unop_sin_reduced:
753 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
756 case ir_unop_cos_reduced:
757 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
762 assert(!"derivatives not valid in vertex shader");
766 assert(!"not reached: should be handled by lower_noise");
770 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
773 assert(!"not reached: should be handled by ir_sub_to_add_neg");
777 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
780 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
782 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
786 case ir_binop_greater:
787 case ir_binop_lequal:
788 case ir_binop_gequal:
790 case ir_binop_nequal: {
791 dst_reg temp = result_dst;
792 /* original gen4 does implicit conversion before comparison. */
794 temp.type = op[0].type;
796 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
797 inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
798 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
802 case ir_binop_all_equal:
803 /* "==" operator producing a scalar boolean. */
804 if (ir->operands[0]->type->is_vector() ||
805 ir->operands[1]->type->is_vector()) {
806 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
807 inst->conditional_mod = BRW_CONDITIONAL_Z;
809 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
810 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
811 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
813 dst_reg temp = result_dst;
814 /* original gen4 does implicit conversion before comparison. */
816 temp.type = op[0].type;
818 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
819 inst->conditional_mod = BRW_CONDITIONAL_NZ;
820 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
823 case ir_binop_any_nequal:
824 /* "!=" operator producing a scalar boolean. */
825 if (ir->operands[0]->type->is_vector() ||
826 ir->operands[1]->type->is_vector()) {
827 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
828 inst->conditional_mod = BRW_CONDITIONAL_NZ;
830 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
831 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
832 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
834 dst_reg temp = result_dst;
835 /* original gen4 does implicit conversion before comparison. */
837 temp.type = op[0].type;
839 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
840 inst->conditional_mod = BRW_CONDITIONAL_NZ;
841 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
846 emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
847 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
849 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
850 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
853 case ir_binop_logic_xor:
854 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
857 case ir_binop_logic_or:
858 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
861 case ir_binop_logic_and:
862 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
866 assert(ir->operands[0]->type->is_vector());
867 assert(ir->operands[0]->type == ir->operands[1]->type);
868 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
872 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
875 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
884 emit(BRW_OPCODE_MOV, result_dst, op[0]);
888 dst_reg temp = result_dst;
889 /* original gen4 does implicit conversion before comparison. */
891 temp.type = op[0].type;
893 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
894 inst->conditional_mod = BRW_CONDITIONAL_NZ;
895 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
900 emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
903 op[0].negate = !op[0].negate;
904 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
905 this->result.negate = true;
908 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
911 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
913 case ir_unop_round_even:
914 emit(BRW_OPCODE_RNDE, result_dst, op[0]);
918 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
919 inst->conditional_mod = BRW_CONDITIONAL_L;
921 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
922 inst->predicate = BRW_PREDICATE_NORMAL;
925 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
926 inst->conditional_mod = BRW_CONDITIONAL_G;
928 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
929 inst->predicate = BRW_PREDICATE_NORMAL;
933 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
936 case ir_unop_bit_not:
937 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
939 case ir_binop_bit_and:
940 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
942 case ir_binop_bit_xor:
943 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
945 case ir_binop_bit_or:
946 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
949 case ir_binop_lshift:
950 case ir_binop_rshift:
951 assert(!"GLSL 1.30 features unsupported");
954 case ir_quadop_vector:
955 assert(!"not reached: should be handled by lower_quadop_vector");
962 vec4_visitor::visit(ir_swizzle *ir)
968 /* Note that this is only swizzles in expressions, not those on the left
969 * hand side of an assignment, which do write masking. See ir_assignment
973 ir->val->accept(this);
975 assert(src.file != BAD_FILE);
977 if (i < ir->type->vector_elements) {
980 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
983 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
986 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
989 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
994 /* Replicate the last channel out. */
995 swizzle[i] = swizzle[ir->type->vector_elements - 1];
998 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1004 vec4_visitor::visit(ir_dereference_variable *ir)
1006 dst_reg *reg = variable_storage(ir->var);
1009 fail("Failed to find variable storage for %s\n", ir->var->name);
1010 this->result = src_reg(brw_null_reg());
1014 this->result = src_reg(*reg);
1018 vec4_visitor::visit(ir_dereference_array *ir)
1020 ir_constant *constant_index;
1022 int element_size = type_size(ir->type);
1024 constant_index = ir->array_index->constant_expression_value();
1026 ir->array->accept(this);
1029 if (constant_index) {
1030 src.reg_offset += constant_index->value.i[0] * element_size;
1032 #if 0 /* Variable array index */
1033 /* Variable index array dereference. It eats the "vec4" of the
1034 * base of the array and an index that offsets the Mesa register
1037 ir->array_index->accept(this);
1041 if (element_size == 1) {
1042 index_reg = this->result;
1044 index_reg = src_reg(this, glsl_type::float_type);
1046 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1047 this->result, src_reg_for_float(element_size));
1050 src.reladdr = ralloc(mem_ctx, src_reg);
1051 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1055 /* If the type is smaller than a vec4, replicate the last channel out. */
1056 if (ir->type->is_scalar() || ir->type->is_vector())
1057 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1059 src.swizzle = BRW_SWIZZLE_NOOP;
1065 vec4_visitor::visit(ir_dereference_record *ir)
1068 const glsl_type *struct_type = ir->record->type;
1071 ir->record->accept(this);
1073 for (i = 0; i < struct_type->length; i++) {
1074 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1076 offset += type_size(struct_type->fields.structure[i].type);
1079 /* If the type is smaller than a vec4, replicate the last channel out. */
1080 if (ir->type->is_scalar() || ir->type->is_vector())
1081 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1083 this->result.swizzle = BRW_SWIZZLE_NOOP;
1085 this->result.reg_offset += offset;
1089 * We want to be careful in assignment setup to hit the actual storage
1090 * instead of potentially using a temporary like we might with the
1091 * ir_dereference handler.
1094 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1096 /* The LHS must be a dereference. If the LHS is a variable indexed array
1097 * access of a vector, it must be separated into a series conditional moves
1098 * before reaching this point (see ir_vec_index_to_cond_assign).
1100 assert(ir->as_dereference());
1101 ir_dereference_array *deref_array = ir->as_dereference_array();
1103 assert(!deref_array->array->type->is_vector());
1106 /* Use the rvalue deref handler for the most part. We'll ignore
1107 * swizzles in it and write swizzles using writemask, though.
1110 return dst_reg(v->result);
1114 vec4_visitor::emit_block_move(ir_assignment *ir)
1116 ir->rhs->accept(this);
1117 src_reg src = this->result;
1119 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1121 /* FINISHME: This should really set to the correct maximal writemask for each
1122 * FINISHME: component written (in the loops below).
1124 dst.writemask = WRITEMASK_XYZW;
1126 for (int i = 0; i < type_size(ir->lhs->type); i++) {
1127 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1129 inst->predicate = BRW_PREDICATE_NORMAL;
1137 vec4_visitor::visit(ir_assignment *ir)
1139 if (!ir->lhs->type->is_scalar() &&
1140 !ir->lhs->type->is_vector()) {
1141 emit_block_move(ir);
1145 /* Now we're down to just a scalar/vector with writemasks. */
1148 ir->rhs->accept(this);
1149 src_reg src = this->result;
1151 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1154 int first_enabled_chan = 0;
1157 assert(ir->lhs->type->is_vector());
1158 dst.writemask = ir->write_mask;
1160 for (int i = 0; i < 4; i++) {
1161 if (dst.writemask & (1 << i)) {
1162 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1167 /* Swizzle a small RHS vector into the channels being written.
1169 * glsl ir treats write_mask as dictating how many channels are
1170 * present on the RHS while in our instructions we need to make
1171 * those channels appear in the slots of the vec4 they're written to.
1173 for (int i = 0; i < 4; i++) {
1174 if (dst.writemask & (1 << i))
1175 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1177 swizzles[i] = first_enabled_chan;
1179 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1180 swizzles[2], swizzles[3]);
1182 if (ir->condition) {
1183 emit_bool_to_cond_code(ir->condition);
1186 for (i = 0; i < type_size(ir->lhs->type); i++) {
1187 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1190 inst->predicate = BRW_PREDICATE_NORMAL;
1199 vec4_visitor::visit(ir_constant *ir)
1201 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1202 src_reg temp_base = src_reg(this, ir->type);
1203 dst_reg temp = dst_reg(temp_base);
1205 foreach_iter(exec_list_iterator, iter, ir->components) {
1206 ir_constant *field_value = (ir_constant *)iter.get();
1207 int size = type_size(field_value->type);
1211 field_value->accept(this);
1212 src_reg src = this->result;
1214 for (int i = 0; i < (unsigned int)size; i++) {
1215 emit(BRW_OPCODE_MOV, temp, src);
1221 this->result = temp_base;
1225 if (ir->type->is_array()) {
1226 src_reg temp_base = src_reg(this, ir->type);
1227 dst_reg temp = dst_reg(temp_base);
1228 int size = type_size(ir->type->fields.array);
1232 for (unsigned int i = 0; i < ir->type->length; i++) {
1233 ir->array_elements[i]->accept(this);
1234 src_reg src = this->result;
1235 for (int j = 0; j < size; j++) {
1236 emit(BRW_OPCODE_MOV, temp, src);
1242 this->result = temp_base;
1246 if (ir->type->is_matrix()) {
1247 this->result = src_reg(this, ir->type);
1248 dst_reg dst = dst_reg(this->result);
1250 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1252 for (int i = 0; i < ir->type->matrix_columns; i++) {
1253 for (int j = 0; j < ir->type->vector_elements; j++) {
1254 dst.writemask = 1 << j;
1255 emit(BRW_OPCODE_MOV, dst,
1256 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1263 for (int i = 0; i < ir->type->vector_elements; i++) {
1264 this->result = src_reg(this, ir->type);
1265 dst_reg dst = dst_reg(this->result);
1267 dst.writemask = 1 << i;
1269 switch (ir->type->base_type) {
1270 case GLSL_TYPE_FLOAT:
1271 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i]));
1274 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i]));
1276 case GLSL_TYPE_UINT:
1277 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i]));
1279 case GLSL_TYPE_BOOL:
1280 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i]));
1283 assert(!"Non-float/uint/int/bool constant");
1290 vec4_visitor::visit(ir_call *ir)
1292 assert(!"not reached");
1296 vec4_visitor::visit(ir_texture *ir)
1298 assert(!"not reached");
1302 vec4_visitor::visit(ir_return *ir)
1304 assert(!"not reached");
1308 vec4_visitor::visit(ir_discard *ir)
1310 assert(!"not reached");
1314 vec4_visitor::visit(ir_if *ir)
1316 this->base_ir = ir->condition;
1317 ir->condition->accept(this);
1318 assert(this->result.file != BAD_FILE);
1320 /* FINISHME: condcode */
1321 emit(BRW_OPCODE_IF);
1323 visit_instructions(&ir->then_instructions);
1325 if (!ir->else_instructions.is_empty()) {
1326 this->base_ir = ir->condition;
1327 emit(BRW_OPCODE_ELSE);
1329 visit_instructions(&ir->else_instructions);
1332 this->base_ir = ir->condition;
1333 emit(BRW_OPCODE_ENDIF);
1337 vec4_visitor::emit_vue_header_gen4(int header_mrf)
1339 /* Get the position */
1340 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1342 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1343 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1345 current_annotation = "NDC";
1346 dst_reg ndc_w = ndc;
1347 ndc_w.writemask = WRITEMASK_W;
1348 src_reg pos_w = pos;
1349 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1350 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1352 dst_reg ndc_xyz = ndc;
1353 ndc_xyz.writemask = WRITEMASK_XYZ;
1355 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1357 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1358 c->key.nr_userclip || brw->has_negative_rhw_bug) {
1359 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1362 emit(BRW_OPCODE_MOV, header1, 0u);
1364 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1365 assert(!"finishme: psiz");
1368 header1.writemask = WRITEMASK_W;
1369 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1370 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1373 for (i = 0; i < c->key.nr_userclip; i++) {
1374 vec4_instruction *inst;
1376 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1377 pos, src_reg(c->userplane[i]));
1378 inst->conditional_mod = BRW_CONDITIONAL_L;
1380 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1381 inst->predicate = BRW_PREDICATE_NORMAL;
1384 /* i965 clipping workaround:
1385 * 1) Test for -ve rhw
1387 * set ndc = (0,0,0,0)
1390 * Later, clipping will detect ucp[6] and ensure the primitive is
1391 * clipped against all fixed planes.
1393 if (brw->has_negative_rhw_bug) {
1397 vec8(brw_null_reg()),
1399 brw_swizzle1(ndc, 3),
1402 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1403 brw_MOV(p, ndc, brw_imm_f(0));
1404 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1408 header1.writemask = WRITEMASK_XYZW;
1409 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1411 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1412 BRW_REGISTER_TYPE_UD), 0u);
1415 if (intel->gen == 5) {
1416 /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1417 * dword 0-3 (m1) of the header is indices, point width, clip flags.
1418 * dword 4-7 (m2) is the ndc position (set above)
1419 * dword 8-11 (m3) of the vertex header is the 4D space position
1420 * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1421 * m6 is a pad so that the vertex element data is aligned
1422 * m7 is the first vertex data we fill, which is the vertex position.
1424 current_annotation = "NDC";
1425 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1427 current_annotation = "gl_Position";
1428 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1430 /* user clip distance. */
1433 /* Pad so that vertex element data (starts with position) is aligned. */
1436 /* There are 8 dwords in VUE header pre-Ironlake:
1437 * dword 0-3 (m1) is indices, point width, clip flags.
1438 * dword 4-7 (m2) is ndc position (set above)
1440 * dword 8-11 (m3) is the first vertex data, which we always have be the
1443 current_annotation = "NDC";
1444 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1446 current_annotation = "gl_Position";
1447 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1454 vec4_visitor::emit_vue_header_gen6(int header_mrf)
1458 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1459 * dword 0-3 (m2) of the header is indices, point width, clip flags.
1460 * dword 4-7 (m3) is the 4D space position
1461 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1464 * m4 or 6 is the first vertex element data we fill, which is
1465 * the vertex position.
1468 current_annotation = "indices, point width, clip flags";
1469 reg = brw_message_reg(header_mrf++);
1470 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1471 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1472 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1473 src_reg(output_reg[VERT_RESULT_PSIZ]));
1476 current_annotation = "gl_Position";
1477 emit(BRW_OPCODE_MOV,
1478 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1480 current_annotation = "user clip distances";
1481 if (c->key.nr_userclip) {
1482 for (int i = 0; i < c->key.nr_userclip; i++) {
1485 m = brw_message_reg(header_mrf);
1487 m = brw_message_reg(header_mrf + 1);
1489 emit(BRW_OPCODE_DP4,
1490 dst_reg(brw_writemask(m, 1 << (i & 7))),
1491 src_reg(c->userplane[i]));
1496 current_annotation = NULL;
1502 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1504 struct intel_context *intel = &brw->intel;
1506 if (intel->gen >= 6) {
1507 /* URB data written (does not include the message header reg) must
1508 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
1509 * section 5.4.3.2.2: URB_INTERLEAVED.
1511 * URB entries are allocated on a multiple of 1024 bits, so an
1512 * extra 128 bits written here to make the end align to 256 is
1515 if ((mlen % 2) != 1)
1523 * Generates the VUE payload plus the 1 or 2 URB write instructions to
1524 * complete the VS thread.
1526 * The VUE layout is documented in Volume 2a.
1529 vec4_visitor::emit_urb_writes()
1535 /* FINISHME: edgeflag */
1537 /* First mrf is the g0-based message header containing URB handles and such,
1538 * which is implied in VS_OPCODE_URB_WRITE.
1542 if (intel->gen >= 6) {
1543 mrf = emit_vue_header_gen6(mrf);
1545 mrf = emit_vue_header_gen4(mrf);
1549 for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1550 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1553 /* This is loaded into the VUE header, and thus doesn't occupy
1554 * an attribute slot.
1556 if (attr == VERT_RESULT_PSIZ)
1559 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1561 /* If this is MRF 15, we can't fit anything more into this URB
1562 * WRITE. Note that base_mrf of 1 means that MRF 15 is an
1563 * even-numbered amount of URB write data, which will meet
1564 * gen6's requirements for length alignment.
1570 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1571 inst->base_mrf = base_mrf;
1572 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1575 urb_entry_size = mrf - base_mrf;
1577 for (; attr < VERT_RESULT_MAX; attr++) {
1578 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1580 fail("Second URB write not supported.\n");
1584 if (intel->gen == 6)
1585 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1587 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1590 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
1591 struct gl_shader_program *prog,
1592 struct brw_shader *shader)
1597 this->intel = &brw->intel;
1598 this->ctx = &intel->ctx;
1600 this->shader = shader;
1602 this->mem_ctx = ralloc_context(NULL);
1603 this->failed = false;
1605 this->base_ir = NULL;
1606 this->current_annotation = NULL;
1609 this->prog_data = &c->prog_data;
1611 this->variable_ht = hash_table_ctor(0,
1612 hash_table_pointer_hash,
1613 hash_table_pointer_compare);
1615 this->virtual_grf_sizes = NULL;
1616 this->virtual_grf_count = 0;
1617 this->virtual_grf_array_size = 0;
1620 vec4_visitor::~vec4_visitor()
1622 hash_table_dtor(this->variable_ht);
1627 vec4_visitor::fail(const char *format, ...)
1637 va_start(va, format);
1638 msg = ralloc_vasprintf(mem_ctx, format, va);
1640 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
1642 this->fail_msg = msg;
1644 if (INTEL_DEBUG & DEBUG_VS) {
1645 fprintf(stderr, "%s", msg);
1649 } /* namespace brw */