src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4.h"
  25 #include "main/macros.h"
  26
  27 namespace brw {
  28
  29 src_reg::src_reg(dst_reg reg)
  30 {
  31    init();
  32
  33    this->file = reg.file;
  34    this->reg = reg.reg;
  35    this->reg_offset = reg.reg_offset;
  36    this->type = reg.type;
  37
  38    int swizzles[4];
  39    int next_chan = 0;
  40    int last = 0;
  41
  42    for (int i = 0; i < 4; i++) {
  43       if (!(reg.writemask & (1 << i)))
  44          continue;
  45
  46       swizzles[next_chan++] = last = i;
  47    }
  48
  49    for (; next_chan < 4; next_chan++) {
  50       swizzles[next_chan] = last;
  51    }
  52
  53    this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
  54                                 swizzles[2], swizzles[3]);
  55 }
  56
  57 dst_reg::dst_reg(src_reg reg)
  58 {
  59    init();
  60
  61    this->file = reg.file;
  62    this->reg = reg.reg;
  63    this->reg_offset = reg.reg_offset;
  64    this->type = reg.type;
  65    this->writemask = WRITEMASK_XYZW;
  66 }
  67
  68 vec4_instruction *
  69 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
  70                    src_reg src0, src_reg src1, src_reg src2)
  71 {
  72    vec4_instruction *inst = new(mem_ctx) vec4_instruction();
  73
  74    inst->opcode = opcode;
  75    inst->dst = dst;
  76    inst->src[0] = src0;
  77    inst->src[1] = src1;
  78    inst->src[2] = src2;
  79    inst->ir = this->base_ir;
  80    inst->annotation = this->current_annotation;
  81
  82    this->instructions.push_tail(inst);
  83
  84    return inst;
  85 }
  86
  87
  88 vec4_instruction *
  89 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
  90 {
  91    return emit(opcode, dst, src0, src1, src_reg());
  92 }
  93
  94 vec4_instruction *
  95 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
  96 {
  97    assert(dst.writemask != 0);
  98    return emit(opcode, dst, src0, src_reg(), src_reg());
  99 }
 100
 101 vec4_instruction *
 102 vec4_visitor::emit(enum opcode opcode)
 103 {
 104    return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
 105 }
 106
 107 void
 108 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
 109 {
 110    static enum opcode dot_opcodes[] = {
 111       BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
 112    };
 113
 114    emit(dot_opcodes[elements - 2], dst, src0, src1);
 115 }
 116
 117 void
 118 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 119 {
 120    /* The gen6 math instruction ignores the source modifiers --
 121     * swizzle, abs, negate, and at least some parts of the register
 122     * region description.  Move the source to the corresponding slots
 123     * of the destination generally work.
 124     */
 125    src_reg expanded = src_reg(this, glsl_type::float_type);
 126    emit(BRW_OPCODE_MOV, dst, src);
 127    src = expanded;
 128
 129    emit(opcode, dst, src);
 130 }
 131
 132 void
 133 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
 134 {
 135    vec4_instruction *inst = emit(opcode, dst, src);
 136    inst->base_mrf = 1;
 137    inst->mlen = 1;
 138 }
 139
 140 void
 141 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
 142 {
 143    switch (opcode) {
 144    case SHADER_OPCODE_RCP:
 145    case SHADER_OPCODE_RSQ:
 146    case SHADER_OPCODE_SQRT:
 147    case SHADER_OPCODE_EXP2:
 148    case SHADER_OPCODE_LOG2:
 149    case SHADER_OPCODE_SIN:
 150    case SHADER_OPCODE_COS:
 151       break;
 152    default:
 153       assert(!"not reached: bad math opcode");
 154       return;
 155    }
 156
 157    if (intel->gen >= 6) {
 158       return emit_math1_gen6(opcode, dst, src);
 159    } else {
 160       return emit_math1_gen4(opcode, dst, src);
 161    }
 162 }
 163
 164 void
 165 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 166                               dst_reg dst, src_reg src0, src_reg src1)
 167 {
 168    src_reg expanded;
 169
 170    /* The gen6 math instruction ignores the source modifiers --
 171     * swizzle, abs, negate, and at least some parts of the register
 172     * region description.  Move the sources to temporaries to make it
 173     * generally work.
 174     */
 175
 176    expanded = src_reg(this, glsl_type::vec4_type);
 177    emit(BRW_OPCODE_MOV, dst, src0);
 178    src0 = expanded;
 179
 180    expanded = src_reg(this, glsl_type::vec4_type);
 181    emit(BRW_OPCODE_MOV, dst, src1);
 182    src1 = expanded;
 183
 184    emit(opcode, dst, src0, src1);
 185 }
 186
 187 void
 188 vec4_visitor::emit_math2_gen4(enum opcode opcode,
 189                               dst_reg dst, src_reg src0, src_reg src1)
 190 {
 191    vec4_instruction *inst = emit(opcode, dst, src0, src1);
 192    inst->base_mrf = 1;
 193    inst->mlen = 2;
 194 }
 195
 196 void
 197 vec4_visitor::emit_math(enum opcode opcode,
 198                         dst_reg dst, src_reg src0, src_reg src1)
 199 {
 200    assert(opcode == SHADER_OPCODE_POW);
 201
 202    if (intel->gen >= 6) {
 203       return emit_math2_gen6(opcode, dst, src0, src1);
 204    } else {
 205       return emit_math2_gen4(opcode, dst, src0, src1);
 206    }
 207 }
 208
 209 void
 210 vec4_visitor::visit_instructions(const exec_list *list)
 211 {
 212    foreach_iter(exec_list_iterator, iter, *list) {
 213       ir_instruction *ir = (ir_instruction *)iter.get();
 214
 215       base_ir = ir;
 216       ir->accept(this);
 217    }
 218 }
 219
 220
 221 static int
 222 type_size(const struct glsl_type *type)
 223 {
 224    unsigned int i;
 225    int size;
 226
 227    switch (type->base_type) {
 228    case GLSL_TYPE_UINT:
 229    case GLSL_TYPE_INT:
 230    case GLSL_TYPE_FLOAT:
 231    case GLSL_TYPE_BOOL:
 232       if (type->is_matrix()) {
 233          return type->matrix_columns;
 234       } else {
 235          /* Regardless of size of vector, it gets a vec4. This is bad
 236           * packing for things like floats, but otherwise arrays become a
 237           * mess.  Hopefully a later pass over the code can pack scalars
 238           * down if appropriate.
 239           */
 240          return 1;
 241       }
 242    case GLSL_TYPE_ARRAY:
 243       assert(type->length > 0);
 244       return type_size(type->fields.array) * type->length;
 245    case GLSL_TYPE_STRUCT:
 246       size = 0;
 247       for (i = 0; i < type->length; i++) {
 248          size += type_size(type->fields.structure[i].type);
 249       }
 250       return size;
 251    case GLSL_TYPE_SAMPLER:
 252       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 253        * at link time.
 254        */
 255       return 1;
 256    default:
 257       assert(0);
 258       return 0;
 259    }
 260 }
 261
 262 int
 263 vec4_visitor::virtual_grf_alloc(int size)
 264 {
 265    if (virtual_grf_array_size <= virtual_grf_count) {
 266       if (virtual_grf_array_size == 0)
 267          virtual_grf_array_size = 16;
 268       else
 269          virtual_grf_array_size *= 2;
 270       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 271                                    virtual_grf_array_size);
 272    }
 273    virtual_grf_sizes[virtual_grf_count] = size;
 274    return virtual_grf_count++;
 275 }
 276
 277 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 278 {
 279    init();
 280
 281    this->file = GRF;
 282    this->reg = v->virtual_grf_alloc(type_size(type));
 283
 284    if (type->is_array() || type->is_record()) {
 285       this->swizzle = BRW_SWIZZLE_NOOP;
 286    } else {
 287       this->swizzle = swizzle_for_size(type->vector_elements);
 288    }
 289
 290    this->type = brw_type_for_base_type(type);
 291 }
 292
 293 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 294 {
 295    init();
 296
 297    this->file = GRF;
 298    this->reg = v->virtual_grf_alloc(type_size(type));
 299
 300    if (type->is_array() || type->is_record()) {
 301       this->writemask = WRITEMASK_XYZW;
 302    } else {
 303       this->writemask = (1 << type->vector_elements) - 1;
 304    }
 305
 306    this->type = brw_type_for_base_type(type);
 307 }
 308
 309 dst_reg *
 310 vec4_visitor::variable_storage(ir_variable *var)
 311 {
 312    return (dst_reg *)hash_table_find(this->variable_ht, var);
 313 }
 314
 315 void
 316 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
 317 {
 318    ir_expression *expr = ir->as_expression();
 319
 320    if (expr) {
 321       src_reg op[2];
 322       vec4_instruction *inst;
 323
 324       assert(expr->get_num_operands() <= 2);
 325       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 326          assert(expr->operands[i]->type->is_scalar());
 327
 328          expr->operands[i]->accept(this);
 329          op[i] = this->result;
 330       }
 331
 332       switch (expr->operation) {
 333       case ir_unop_logic_not:
 334          inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
 335          inst->conditional_mod = BRW_CONDITIONAL_Z;
 336          break;
 337
 338       case ir_binop_logic_xor:
 339          inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
 340          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 341          break;
 342
 343       case ir_binop_logic_or:
 344          inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
 345          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 346          break;
 347
 348       case ir_binop_logic_and:
 349          inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
 350          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 351          break;
 352
 353       case ir_unop_f2b:
 354          if (intel->gen >= 6) {
 355             inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
 356          } else {
 357             inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
 358          }
 359          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 360          break;
 361
 362       case ir_unop_i2b:
 363          if (intel->gen >= 6) {
 364             inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
 365          } else {
 366             inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
 367          }
 368          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 369          break;
 370
 371       case ir_binop_greater:
 372       case ir_binop_gequal:
 373       case ir_binop_less:
 374       case ir_binop_lequal:
 375       case ir_binop_equal:
 376       case ir_binop_all_equal:
 377       case ir_binop_nequal:
 378       case ir_binop_any_nequal:
 379          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 380          inst->conditional_mod =
 381             brw_conditional_for_comparison(expr->operation);
 382          break;
 383
 384       default:
 385          assert(!"not reached");
 386          break;
 387       }
 388       return;
 389    }
 390
 391    ir->accept(this);
 392
 393    if (intel->gen >= 6) {
 394       vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
 395                                this->result, src_reg(1));
 396       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 397    } else {
 398       vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
 399       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 400    }
 401 }
 402
 403 /**
 404  * Emit a gen6 IF statement with the comparison folded into the IF
 405  * instruction.
 406  */
 407 void
 408 vec4_visitor::emit_if_gen6(ir_if *ir)
 409 {
 410    ir_expression *expr = ir->condition->as_expression();
 411
 412    if (expr) {
 413       src_reg op[2];
 414       vec4_instruction *inst;
 415       dst_reg temp;
 416
 417       assert(expr->get_num_operands() <= 2);
 418       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 419          assert(expr->operands[i]->type->is_scalar());
 420
 421          expr->operands[i]->accept(this);
 422          op[i] = this->result;
 423       }
 424
 425       switch (expr->operation) {
 426       case ir_unop_logic_not:
 427          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 428          inst->conditional_mod = BRW_CONDITIONAL_Z;
 429          return;
 430
 431       case ir_binop_logic_xor:
 432          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
 433          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 434          return;
 435
 436       case ir_binop_logic_or:
 437          temp = dst_reg(this, glsl_type::bool_type);
 438          emit(BRW_OPCODE_OR, temp, op[0], op[1]);
 439          inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
 440          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 441          return;
 442
 443       case ir_binop_logic_and:
 444          temp = dst_reg(this, glsl_type::bool_type);
 445          emit(BRW_OPCODE_AND, temp, op[0], op[1]);
 446          inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
 447          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 448          return;
 449
 450       case ir_unop_f2b:
 451          inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
 452          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 453          return;
 454
 455       case ir_unop_i2b:
 456          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 457          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 458          return;
 459
 460       case ir_binop_greater:
 461       case ir_binop_gequal:
 462       case ir_binop_less:
 463       case ir_binop_lequal:
 464       case ir_binop_equal:
 465       case ir_binop_all_equal:
 466       case ir_binop_nequal:
 467       case ir_binop_any_nequal:
 468          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
 469          inst->conditional_mod =
 470             brw_conditional_for_comparison(expr->operation);
 471          return;
 472       default:
 473          assert(!"not reached");
 474          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 475          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 476          return;
 477       }
 478       return;
 479    }
 480
 481    ir->condition->accept(this);
 482
 483    vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
 484                             this->result, src_reg(0));
 485    inst->conditional_mod = BRW_CONDITIONAL_NZ;
 486 }
 487
 488 void
 489 vec4_visitor::visit(ir_variable *ir)
 490 {
 491    dst_reg *reg = NULL;
 492
 493    if (variable_storage(ir))
 494       return;
 495
 496    switch (ir->mode) {
 497    case ir_var_in:
 498       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
 499       reg->type = brw_type_for_base_type(ir->type);
 500       hash_table_insert(this->variable_ht, reg, ir);
 501       break;
 502
 503    case ir_var_out:
 504       reg = new(mem_ctx) dst_reg(this, ir->type);
 505       hash_table_insert(this->variable_ht, reg, ir);
 506
 507       for (int i = 0; i < type_size(ir->type); i++) {
 508          output_reg[ir->location + i] = *reg;
 509          output_reg[ir->location + i].reg_offset = i;
 510       }
 511       break;
 512
 513    case ir_var_temporary:
 514       reg = new(mem_ctx) dst_reg(this, ir->type);
 515       hash_table_insert(this->variable_ht, reg, ir);
 516
 517       break;
 518
 519    case ir_var_uniform:
 520       /* FINISHME: uniforms */
 521       break;
 522    }
 523 }
 524
 525 void
 526 vec4_visitor::visit(ir_loop *ir)
 527 {
 528    ir_dereference_variable *counter = NULL;
 529
 530    /* We don't want debugging output to print the whole body of the
 531     * loop as the annotation.
 532     */
 533    this->base_ir = NULL;
 534
 535    if (ir->counter != NULL)
 536       counter = new(ir) ir_dereference_variable(ir->counter);
 537
 538    if (ir->from != NULL) {
 539       assert(ir->counter != NULL);
 540
 541       ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
 542
 543       a->accept(this);
 544       delete a;
 545    }
 546
 547    emit(BRW_OPCODE_DO);
 548
 549    if (ir->to) {
 550       ir_expression *e =
 551          new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
 552                                counter, ir->to);
 553       ir_if *if_stmt =  new(ir) ir_if(e);
 554
 555       ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
 556
 557       if_stmt->then_instructions.push_tail(brk);
 558
 559       if_stmt->accept(this);
 560
 561       delete if_stmt;
 562       delete e;
 563       delete brk;
 564    }
 565
 566    visit_instructions(&ir->body_instructions);
 567
 568    if (ir->increment) {
 569       ir_expression *e =
 570          new(ir) ir_expression(ir_binop_add, counter->type,
 571                                counter, ir->increment);
 572
 573       ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
 574
 575       a->accept(this);
 576       delete a;
 577       delete e;
 578    }
 579
 580    emit(BRW_OPCODE_WHILE);
 581 }
 582
 583 void
 584 vec4_visitor::visit(ir_loop_jump *ir)
 585 {
 586    switch (ir->mode) {
 587    case ir_loop_jump::jump_break:
 588       emit(BRW_OPCODE_BREAK);
 589       break;
 590    case ir_loop_jump::jump_continue:
 591       emit(BRW_OPCODE_CONTINUE);
 592       break;
 593    }
 594 }
 595
 596
 597 void
 598 vec4_visitor::visit(ir_function_signature *ir)
 599 {
 600    assert(0);
 601    (void)ir;
 602 }
 603
 604 void
 605 vec4_visitor::visit(ir_function *ir)
 606 {
 607    /* Ignore function bodies other than main() -- we shouldn't see calls to
 608     * them since they should all be inlined.
 609     */
 610    if (strcmp(ir->name, "main") == 0) {
 611       const ir_function_signature *sig;
 612       exec_list empty;
 613
 614       sig = ir->matching_signature(&empty);
 615
 616       assert(sig);
 617
 618       visit_instructions(&sig->body);
 619    }
 620 }
 621
 622 GLboolean
 623 vec4_visitor::try_emit_sat(ir_expression *ir)
 624 {
 625    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
 626    if (!sat_src)
 627       return false;
 628
 629    sat_src->accept(this);
 630    src_reg src = this->result;
 631
 632    this->result = src_reg(this, ir->type);
 633    vec4_instruction *inst;
 634    inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
 635    inst->saturate = true;
 636
 637    return true;
 638 }
 639
 640 void
 641 vec4_visitor::emit_bool_comparison(unsigned int op,
 642                                  dst_reg dst, src_reg src0, src_reg src1)
 643 {
 644    /* original gen4 does destination conversion before comparison. */
 645    if (intel->gen < 5)
 646       dst.type = src0.type;
 647
 648    vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
 649    inst->conditional_mod = brw_conditional_for_comparison(op);
 650
 651    dst.type = BRW_REGISTER_TYPE_D;
 652    emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
 653 }
 654
 655 void
 656 vec4_visitor::visit(ir_expression *ir)
 657 {
 658    unsigned int operand;
 659    src_reg op[Elements(ir->operands)];
 660    src_reg result_src;
 661    dst_reg result_dst;
 662    vec4_instruction *inst;
 663
 664    if (try_emit_sat(ir))
 665       return;
 666
 667    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 668       this->result.file = BAD_FILE;
 669       ir->operands[operand]->accept(this);
 670       if (this->result.file == BAD_FILE) {
 671          printf("Failed to get tree for expression operand:\n");
 672          ir->operands[operand]->print();
 673          exit(1);
 674       }
 675       op[operand] = this->result;
 676
 677       /* Matrix expression operands should have been broken down to vector
 678        * operations already.
 679        */
 680       assert(!ir->operands[operand]->type->is_matrix());
 681    }
 682
 683    int vector_elements = ir->operands[0]->type->vector_elements;
 684    if (ir->operands[1]) {
 685       vector_elements = MAX2(vector_elements,
 686                              ir->operands[1]->type->vector_elements);
 687    }
 688
 689    this->result.file = BAD_FILE;
 690
 691    /* Storage for our result.  Ideally for an assignment we'd be using
 692     * the actual storage for the result here, instead.
 693     */
 694    result_src = src_reg(this, ir->type);
 695    /* convenience for the emit functions below. */
 696    result_dst = dst_reg(result_src);
 697    /* If nothing special happens, this is the result. */
 698    this->result = result_src;
 699    /* Limit writes to the channels that will be used by result_src later.
 700     * This does limit this temp's use as a temporary for multi-instruction
 701     * sequences.
 702     */
 703    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 704
 705    switch (ir->operation) {
 706    case ir_unop_logic_not:
 707       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
 708        * ones complement of the whole register, not just bit 0.
 709        */
 710       emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
 711       break;
 712    case ir_unop_neg:
 713       op[0].negate = !op[0].negate;
 714       this->result = op[0];
 715       break;
 716    case ir_unop_abs:
 717       op[0].abs = true;
 718       op[0].negate = false;
 719       this->result = op[0];
 720       break;
 721
 722    case ir_unop_sign:
 723       emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
 724
 725       inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
 726       inst->conditional_mod = BRW_CONDITIONAL_G;
 727       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
 728       inst->predicate = BRW_PREDICATE_NORMAL;
 729
 730       inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
 731       inst->conditional_mod = BRW_CONDITIONAL_L;
 732       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
 733       inst->predicate = BRW_PREDICATE_NORMAL;
 734
 735       break;
 736
 737    case ir_unop_rcp:
 738       emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
 739       break;
 740
 741    case ir_unop_exp2:
 742       emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
 743       break;
 744    case ir_unop_log2:
 745       emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
 746       break;
 747    case ir_unop_exp:
 748    case ir_unop_log:
 749       assert(!"not reached: should be handled by ir_explog_to_explog2");
 750       break;
 751    case ir_unop_sin:
 752    case ir_unop_sin_reduced:
 753       emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
 754       break;
 755    case ir_unop_cos:
 756    case ir_unop_cos_reduced:
 757       emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
 758       break;
 759
 760    case ir_unop_dFdx:
 761    case ir_unop_dFdy:
 762       assert(!"derivatives not valid in vertex shader");
 763       break;
 764
 765    case ir_unop_noise:
 766       assert(!"not reached: should be handled by lower_noise");
 767       break;
 768
 769    case ir_binop_add:
 770       emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
 771       break;
 772    case ir_binop_sub:
 773       assert(!"not reached: should be handled by ir_sub_to_add_neg");
 774       break;
 775
 776    case ir_binop_mul:
 777       emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
 778       break;
 779    case ir_binop_div:
 780       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
 781    case ir_binop_mod:
 782       assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
 783       break;
 784
 785    case ir_binop_less:
 786    case ir_binop_greater:
 787    case ir_binop_lequal:
 788    case ir_binop_gequal:
 789    case ir_binop_equal:
 790    case ir_binop_nequal: {
 791       dst_reg temp = result_dst;
 792       /* original gen4 does implicit conversion before comparison. */
 793       if (intel->gen < 5)
 794          temp.type = op[0].type;
 795
 796       inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
 797       inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
 798       emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
 799       break;
 800    }
 801
 802    case ir_binop_all_equal:
 803       /* "==" operator producing a scalar boolean. */
 804       if (ir->operands[0]->type->is_vector() ||
 805           ir->operands[1]->type->is_vector()) {
 806          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 807          inst->conditional_mod = BRW_CONDITIONAL_Z;
 808
 809          emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
 810          inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
 811          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 812       } else {
 813          dst_reg temp = result_dst;
 814          /* original gen4 does implicit conversion before comparison. */
 815          if (intel->gen < 5)
 816             temp.type = op[0].type;
 817
 818          inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
 819          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 820          emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
 821       }
 822       break;
 823    case ir_binop_any_nequal:
 824       /* "!=" operator producing a scalar boolean. */
 825       if (ir->operands[0]->type->is_vector() ||
 826           ir->operands[1]->type->is_vector()) {
 827          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 828          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 829
 830          emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
 831          inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
 832          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 833       } else {
 834          dst_reg temp = result_dst;
 835          /* original gen4 does implicit conversion before comparison. */
 836          if (intel->gen < 5)
 837             temp.type = op[0].type;
 838
 839          inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
 840          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 841          emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
 842       }
 843       break;
 844
 845    case ir_unop_any:
 846       emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
 847       emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
 848
 849       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
 850       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 851       break;
 852
 853    case ir_binop_logic_xor:
 854       emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
 855       break;
 856
 857    case ir_binop_logic_or:
 858       emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
 859       break;
 860
 861    case ir_binop_logic_and:
 862       emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
 863       break;
 864
 865    case ir_binop_dot:
 866       assert(ir->operands[0]->type->is_vector());
 867       assert(ir->operands[0]->type == ir->operands[1]->type);
 868       emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
 869       break;
 870
 871    case ir_unop_sqrt:
 872       emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
 873       break;
 874    case ir_unop_rsq:
 875       emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
 876       break;
 877    case ir_unop_i2f:
 878    case ir_unop_i2u:
 879    case ir_unop_u2i:
 880    case ir_unop_u2f:
 881    case ir_unop_b2f:
 882    case ir_unop_b2i:
 883    case ir_unop_f2i:
 884       emit(BRW_OPCODE_MOV, result_dst, op[0]);
 885       break;
 886    case ir_unop_f2b:
 887    case ir_unop_i2b: {
 888       dst_reg temp = result_dst;
 889       /* original gen4 does implicit conversion before comparison. */
 890       if (intel->gen < 5)
 891          temp.type = op[0].type;
 892
 893       inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
 894       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 895       inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
 896       break;
 897    }
 898
 899    case ir_unop_trunc:
 900       emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
 901       break;
 902    case ir_unop_ceil:
 903       op[0].negate = !op[0].negate;
 904       inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
 905       this->result.negate = true;
 906       break;
 907    case ir_unop_floor:
 908       inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
 909       break;
 910    case ir_unop_fract:
 911       inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
 912       break;
 913    case ir_unop_round_even:
 914       emit(BRW_OPCODE_RNDE, result_dst, op[0]);
 915       break;
 916
 917    case ir_binop_min:
 918       inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
 919       inst->conditional_mod = BRW_CONDITIONAL_L;
 920
 921       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
 922       inst->predicate = BRW_PREDICATE_NORMAL;
 923       break;
 924    case ir_binop_max:
 925       inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
 926       inst->conditional_mod = BRW_CONDITIONAL_G;
 927
 928       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
 929       inst->predicate = BRW_PREDICATE_NORMAL;
 930       break;
 931
 932    case ir_binop_pow:
 933       emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
 934       break;
 935
 936    case ir_unop_bit_not:
 937       inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
 938       break;
 939    case ir_binop_bit_and:
 940       inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
 941       break;
 942    case ir_binop_bit_xor:
 943       inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
 944       break;
 945    case ir_binop_bit_or:
 946       inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
 947       break;
 948
 949    case ir_binop_lshift:
 950    case ir_binop_rshift:
 951       assert(!"GLSL 1.30 features unsupported");
 952       break;
 953
 954    case ir_quadop_vector:
 955       assert(!"not reached: should be handled by lower_quadop_vector");
 956       break;
 957    }
 958 }
 959
 960
 961 void
 962 vec4_visitor::visit(ir_swizzle *ir)
 963 {
 964    src_reg src;
 965    int i = 0;
 966    int swizzle[4];
 967
 968    /* Note that this is only swizzles in expressions, not those on the left
 969     * hand side of an assignment, which do write masking.  See ir_assignment
 970     * for that.
 971     */
 972
 973    ir->val->accept(this);
 974    src = this->result;
 975    assert(src.file != BAD_FILE);
 976
 977    if (i < ir->type->vector_elements) {
 978       switch (i) {
 979       case 0:
 980          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
 981          break;
 982       case 1:
 983          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
 984          break;
 985       case 2:
 986          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
 987          break;
 988       case 3:
 989          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
 990             break;
 991       }
 992    }
 993    for (; i < 4; i++) {
 994       /* Replicate the last channel out. */
 995       swizzle[i] = swizzle[ir->type->vector_elements - 1];
 996    }
 997
 998    src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
 999
1000    this->result = src;
1001 }
1002
1003 void
1004 vec4_visitor::visit(ir_dereference_variable *ir)
1005 {
1006    dst_reg *reg = variable_storage(ir->var);
1007
1008    if (!reg) {
1009       fail("Failed to find variable storage for %s\n", ir->var->name);
1010       this->result = src_reg(brw_null_reg());
1011       return;
1012    }
1013
1014    this->result = src_reg(*reg);
1015 }
1016
1017 void
1018 vec4_visitor::visit(ir_dereference_array *ir)
1019 {
1020    ir_constant *constant_index;
1021    src_reg src;
1022    int element_size = type_size(ir->type);
1023
1024    constant_index = ir->array_index->constant_expression_value();
1025
1026    ir->array->accept(this);
1027    src = this->result;
1028
1029    if (constant_index) {
1030       src.reg_offset += constant_index->value.i[0] * element_size;
1031    } else {
1032 #if 0 /* Variable array index */
1033       /* Variable index array dereference.  It eats the "vec4" of the
1034        * base of the array and an index that offsets the Mesa register
1035        * index.
1036        */
1037       ir->array_index->accept(this);
1038
1039       src_reg index_reg;
1040
1041       if (element_size == 1) {
1042          index_reg = this->result;
1043       } else {
1044          index_reg = src_reg(this, glsl_type::float_type);
1045
1046          emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1047               this->result, src_reg_for_float(element_size));
1048       }
1049
1050       src.reladdr = ralloc(mem_ctx, src_reg);
1051       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1052 #endif
1053    }
1054
1055    /* If the type is smaller than a vec4, replicate the last channel out. */
1056    if (ir->type->is_scalar() || ir->type->is_vector())
1057       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1058    else
1059       src.swizzle = BRW_SWIZZLE_NOOP;
1060
1061    this->result = src;
1062 }
1063
1064 void
1065 vec4_visitor::visit(ir_dereference_record *ir)
1066 {
1067    unsigned int i;
1068    const glsl_type *struct_type = ir->record->type;
1069    int offset = 0;
1070
1071    ir->record->accept(this);
1072
1073    for (i = 0; i < struct_type->length; i++) {
1074       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1075          break;
1076       offset += type_size(struct_type->fields.structure[i].type);
1077    }
1078
1079    /* If the type is smaller than a vec4, replicate the last channel out. */
1080    if (ir->type->is_scalar() || ir->type->is_vector())
1081       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1082    else
1083       this->result.swizzle = BRW_SWIZZLE_NOOP;
1084
1085    this->result.reg_offset += offset;
1086 }
1087
1088 /**
1089  * We want to be careful in assignment setup to hit the actual storage
1090  * instead of potentially using a temporary like we might with the
1091  * ir_dereference handler.
1092  */
1093 static dst_reg
1094 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1095 {
1096    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1097     * access of a vector, it must be separated into a series conditional moves
1098     * before reaching this point (see ir_vec_index_to_cond_assign).
1099     */
1100    assert(ir->as_dereference());
1101    ir_dereference_array *deref_array = ir->as_dereference_array();
1102    if (deref_array) {
1103       assert(!deref_array->array->type->is_vector());
1104    }
1105
1106    /* Use the rvalue deref handler for the most part.  We'll ignore
1107     * swizzles in it and write swizzles using writemask, though.
1108     */
1109    ir->accept(v);
1110    return dst_reg(v->result);
1111 }
1112
1113 void
1114 vec4_visitor::emit_block_move(ir_assignment *ir)
1115 {
1116    ir->rhs->accept(this);
1117    src_reg src = this->result;
1118
1119    dst_reg dst = get_assignment_lhs(ir->lhs, this);
1120
1121    /* FINISHME: This should really set to the correct maximal writemask for each
1122     * FINISHME: component written (in the loops below).
1123     */
1124    dst.writemask = WRITEMASK_XYZW;
1125
1126    for (int i = 0; i < type_size(ir->lhs->type); i++) {
1127       vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1128       if (ir->condition)
1129          inst->predicate = BRW_PREDICATE_NORMAL;
1130
1131       dst.reg_offset++;
1132       src.reg_offset++;
1133    }
1134 }
1135
1136 void
1137 vec4_visitor::visit(ir_assignment *ir)
1138 {
1139    if (!ir->lhs->type->is_scalar() &&
1140        !ir->lhs->type->is_vector()) {
1141       emit_block_move(ir);
1142       return;
1143    }
1144
1145    /* Now we're down to just a scalar/vector with writemasks. */
1146    int i;
1147
1148    ir->rhs->accept(this);
1149    src_reg src = this->result;
1150
1151    dst_reg dst = get_assignment_lhs(ir->lhs, this);
1152
1153    int swizzles[4];
1154    int first_enabled_chan = 0;
1155    int src_chan = 0;
1156
1157    assert(ir->lhs->type->is_vector());
1158    dst.writemask = ir->write_mask;
1159
1160    for (int i = 0; i < 4; i++) {
1161       if (dst.writemask & (1 << i)) {
1162          first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1163          break;
1164       }
1165    }
1166
1167    /* Swizzle a small RHS vector into the channels being written.
1168     *
1169     * glsl ir treats write_mask as dictating how many channels are
1170     * present on the RHS while in our instructions we need to make
1171     * those channels appear in the slots of the vec4 they're written to.
1172     */
1173    for (int i = 0; i < 4; i++) {
1174       if (dst.writemask & (1 << i))
1175          swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1176       else
1177          swizzles[i] = first_enabled_chan;
1178    }
1179    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1180                               swizzles[2], swizzles[3]);
1181
1182    if (ir->condition) {
1183       emit_bool_to_cond_code(ir->condition);
1184    }
1185
1186    for (i = 0; i < type_size(ir->lhs->type); i++) {
1187       vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1188
1189       if (ir->condition)
1190          inst->predicate = BRW_PREDICATE_NORMAL;
1191
1192       dst.reg_offset++;
1193       src.reg_offset++;
1194    }
1195 }
1196
1197
1198 void
1199 vec4_visitor::visit(ir_constant *ir)
1200 {
1201    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1202       src_reg temp_base = src_reg(this, ir->type);
1203       dst_reg temp = dst_reg(temp_base);
1204
1205       foreach_iter(exec_list_iterator, iter, ir->components) {
1206          ir_constant *field_value = (ir_constant *)iter.get();
1207          int size = type_size(field_value->type);
1208
1209          assert(size > 0);
1210
1211          field_value->accept(this);
1212          src_reg src = this->result;
1213
1214          for (int i = 0; i < (unsigned int)size; i++) {
1215             emit(BRW_OPCODE_MOV, temp, src);
1216
1217             src.reg_offset++;
1218             temp.reg_offset++;
1219          }
1220       }
1221       this->result = temp_base;
1222       return;
1223    }
1224
1225    if (ir->type->is_array()) {
1226       src_reg temp_base = src_reg(this, ir->type);
1227       dst_reg temp = dst_reg(temp_base);
1228       int size = type_size(ir->type->fields.array);
1229
1230       assert(size > 0);
1231
1232       for (unsigned int i = 0; i < ir->type->length; i++) {
1233          ir->array_elements[i]->accept(this);
1234          src_reg src = this->result;
1235          for (int j = 0; j < size; j++) {
1236             emit(BRW_OPCODE_MOV, temp, src);
1237
1238             src.reg_offset++;
1239             temp.reg_offset++;
1240          }
1241       }
1242       this->result = temp_base;
1243       return;
1244    }
1245
1246    if (ir->type->is_matrix()) {
1247       this->result = src_reg(this, ir->type);
1248       dst_reg dst = dst_reg(this->result);
1249
1250       assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1251
1252       for (int i = 0; i < ir->type->matrix_columns; i++) {
1253          for (int j = 0; j < ir->type->vector_elements; j++) {
1254             dst.writemask = 1 << j;
1255             emit(BRW_OPCODE_MOV, dst,
1256                  src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1257          }
1258          dst.reg_offset++;
1259       }
1260       return;
1261    }
1262
1263    for (int i = 0; i < ir->type->vector_elements; i++) {
1264       this->result = src_reg(this, ir->type);
1265       dst_reg dst = dst_reg(this->result);
1266
1267       dst.writemask = 1 << i;
1268
1269       switch (ir->type->base_type) {
1270       case GLSL_TYPE_FLOAT:
1271          emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i]));
1272          break;
1273       case GLSL_TYPE_INT:
1274          emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i]));
1275          break;
1276       case GLSL_TYPE_UINT:
1277          emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i]));
1278          break;
1279       case GLSL_TYPE_BOOL:
1280          emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i]));
1281          break;
1282       default:
1283          assert(!"Non-float/uint/int/bool constant");
1284          break;
1285       }
1286    }
1287 }
1288
1289 void
1290 vec4_visitor::visit(ir_call *ir)
1291 {
1292    assert(!"not reached");
1293 }
1294
1295 void
1296 vec4_visitor::visit(ir_texture *ir)
1297 {
1298    assert(!"not reached");
1299 }
1300
1301 void
1302 vec4_visitor::visit(ir_return *ir)
1303 {
1304    assert(!"not reached");
1305 }
1306
1307 void
1308 vec4_visitor::visit(ir_discard *ir)
1309 {
1310    assert(!"not reached");
1311 }
1312
1313 void
1314 vec4_visitor::visit(ir_if *ir)
1315 {
1316    this->base_ir = ir->condition;
1317    ir->condition->accept(this);
1318    assert(this->result.file != BAD_FILE);
1319
1320    /* FINISHME: condcode */
1321    emit(BRW_OPCODE_IF);
1322
1323    visit_instructions(&ir->then_instructions);
1324
1325    if (!ir->else_instructions.is_empty()) {
1326       this->base_ir = ir->condition;
1327       emit(BRW_OPCODE_ELSE);
1328
1329       visit_instructions(&ir->else_instructions);
1330    }
1331
1332    this->base_ir = ir->condition;
1333    emit(BRW_OPCODE_ENDIF);
1334 }
1335
1336 int
1337 vec4_visitor::emit_vue_header_gen4(int header_mrf)
1338 {
1339    /* Get the position */
1340    src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1341
1342    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1343    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1344
1345    current_annotation = "NDC";
1346    dst_reg ndc_w = ndc;
1347    ndc_w.writemask = WRITEMASK_W;
1348    src_reg pos_w = pos;
1349    pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1350    emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1351
1352    dst_reg ndc_xyz = ndc;
1353    ndc_xyz.writemask = WRITEMASK_XYZ;
1354
1355    emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1356
1357    if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1358        c->key.nr_userclip || brw->has_negative_rhw_bug) {
1359       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1360       GLuint i;
1361
1362       emit(BRW_OPCODE_MOV, header1, 0u);
1363
1364       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1365          assert(!"finishme: psiz");
1366          src_reg psiz;
1367
1368          header1.writemask = WRITEMASK_W;
1369          emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1370          emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1371       }
1372
1373       for (i = 0; i < c->key.nr_userclip; i++) {
1374          vec4_instruction *inst;
1375
1376          inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1377                      pos, src_reg(c->userplane[i]));
1378          inst->conditional_mod = BRW_CONDITIONAL_L;
1379
1380          emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1381          inst->predicate = BRW_PREDICATE_NORMAL;
1382       }
1383
1384       /* i965 clipping workaround:
1385        * 1) Test for -ve rhw
1386        * 2) If set,
1387        *      set ndc = (0,0,0,0)
1388        *      set ucp[6] = 1
1389        *
1390        * Later, clipping will detect ucp[6] and ensure the primitive is
1391        * clipped against all fixed planes.
1392        */
1393       if (brw->has_negative_rhw_bug) {
1394 #if 0
1395          /* FINISHME */
1396          brw_CMP(p,
1397                  vec8(brw_null_reg()),
1398                  BRW_CONDITIONAL_L,
1399                  brw_swizzle1(ndc, 3),
1400                  brw_imm_f(0));
1401
1402          brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1403          brw_MOV(p, ndc, brw_imm_f(0));
1404          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1405 #endif
1406       }
1407
1408       header1.writemask = WRITEMASK_XYZW;
1409       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1410    } else {
1411       emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1412                                   BRW_REGISTER_TYPE_UD), 0u);
1413    }
1414
1415    if (intel->gen == 5) {
1416       /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1417        * dword 0-3 (m1) of the header is indices, point width, clip flags.
1418        * dword 4-7 (m2) is the ndc position (set above)
1419        * dword 8-11 (m3) of the vertex header is the 4D space position
1420        * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1421        * m6 is a pad so that the vertex element data is aligned
1422        * m7 is the first vertex data we fill, which is the vertex position.
1423        */
1424       current_annotation = "NDC";
1425       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1426
1427       current_annotation = "gl_Position";
1428       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1429
1430       /* user clip distance. */
1431       header_mrf += 2;
1432
1433       /* Pad so that vertex element data (starts with position) is aligned. */
1434       header_mrf++;
1435    } else {
1436       /* There are 8 dwords in VUE header pre-Ironlake:
1437        * dword 0-3 (m1) is indices, point width, clip flags.
1438        * dword 4-7 (m2) is ndc position (set above)
1439        *
1440        * dword 8-11 (m3) is the first vertex data, which we always have be the
1441        * vertex position.
1442        */
1443       current_annotation = "NDC";
1444       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1445
1446       current_annotation = "gl_Position";
1447       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1448    }
1449
1450    return header_mrf;
1451 }
1452
1453 int
1454 vec4_visitor::emit_vue_header_gen6(int header_mrf)
1455 {
1456    struct brw_reg reg;
1457
1458    /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1459     * dword 0-3 (m2) of the header is indices, point width, clip flags.
1460     * dword 4-7 (m3) is the 4D space position
1461     * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1462     * enabled.
1463     *
1464     * m4 or 6 is the first vertex element data we fill, which is
1465     * the vertex position.
1466     */
1467
1468    current_annotation = "indices, point width, clip flags";
1469    reg = brw_message_reg(header_mrf++);
1470    emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1471    if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1472       emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1473            src_reg(output_reg[VERT_RESULT_PSIZ]));
1474    }
1475
1476    current_annotation = "gl_Position";
1477    emit(BRW_OPCODE_MOV,
1478         brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1479
1480    current_annotation = "user clip distances";
1481    if (c->key.nr_userclip) {
1482       for (int i = 0; i < c->key.nr_userclip; i++) {
1483          struct brw_reg m;
1484          if (i < 4)
1485             m = brw_message_reg(header_mrf);
1486          else
1487             m = brw_message_reg(header_mrf + 1);
1488
1489          emit(BRW_OPCODE_DP4,
1490               dst_reg(brw_writemask(m, 1 << (i & 7))),
1491               src_reg(c->userplane[i]));
1492       }
1493       header_mrf += 2;
1494    }
1495
1496    current_annotation = NULL;
1497
1498    return header_mrf;
1499 }
1500
1501 static int
1502 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1503 {
1504    struct intel_context *intel = &brw->intel;
1505
1506    if (intel->gen >= 6) {
1507       /* URB data written (does not include the message header reg) must
1508        * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
1509        * section 5.4.3.2.2: URB_INTERLEAVED.
1510        *
1511        * URB entries are allocated on a multiple of 1024 bits, so an
1512        * extra 128 bits written here to make the end align to 256 is
1513        * no problem.
1514        */
1515       if ((mlen % 2) != 1)
1516          mlen++;
1517    }
1518
1519    return mlen;
1520 }
1521
1522 /**
1523  * Generates the VUE payload plus the 1 or 2 URB write instructions to
1524  * complete the VS thread.
1525  *
1526  * The VUE layout is documented in Volume 2a.
1527  */
1528 void
1529 vec4_visitor::emit_urb_writes()
1530 {
1531    int base_mrf = 1;
1532    int mrf = base_mrf;
1533    int urb_entry_size;
1534
1535    /* FINISHME: edgeflag */
1536
1537    /* First mrf is the g0-based message header containing URB handles and such,
1538     * which is implied in VS_OPCODE_URB_WRITE.
1539     */
1540    mrf++;
1541
1542    if (intel->gen >= 6) {
1543       mrf = emit_vue_header_gen6(mrf);
1544    } else {
1545       mrf = emit_vue_header_gen4(mrf);
1546    }
1547
1548    int attr;
1549    for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1550       if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1551          continue;
1552
1553       /* This is loaded into the VUE header, and thus doesn't occupy
1554        * an attribute slot.
1555        */
1556       if (attr == VERT_RESULT_PSIZ)
1557          continue;
1558
1559       emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1560
1561       /* If this is MRF 15, we can't fit anything more into this URB
1562        * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
1563        * even-numbered amount of URB write data, which will meet
1564        * gen6's requirements for length alignment.
1565        */
1566       if (mrf == 15)
1567          break;
1568    }
1569
1570    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1571    inst->base_mrf = base_mrf;
1572    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1573    inst->eot = true;
1574
1575    urb_entry_size = mrf - base_mrf;
1576
1577    for (; attr < VERT_RESULT_MAX; attr++) {
1578       if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1579          continue;
1580       fail("Second URB write not supported.\n");
1581       break;
1582    }
1583
1584    if (intel->gen == 6)
1585       c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1586    else
1587       c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1588 }
1589
1590 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
1591                            struct gl_shader_program *prog,
1592                            struct brw_shader *shader)
1593 {
1594    this->c = c;
1595    this->p = &c->func;
1596    this->brw = p->brw;
1597    this->intel = &brw->intel;
1598    this->ctx = &intel->ctx;
1599    this->prog = prog;
1600    this->shader = shader;
1601
1602    this->mem_ctx = ralloc_context(NULL);
1603    this->failed = false;
1604
1605    this->base_ir = NULL;
1606    this->current_annotation = NULL;
1607
1608    this->c = c;
1609    this->prog_data = &c->prog_data;
1610
1611    this->variable_ht = hash_table_ctor(0,
1612                                        hash_table_pointer_hash,
1613                                        hash_table_pointer_compare);
1614
1615    this->virtual_grf_sizes = NULL;
1616    this->virtual_grf_count = 0;
1617    this->virtual_grf_array_size = 0;
1618 }
1619
1620 vec4_visitor::~vec4_visitor()
1621 {
1622    hash_table_dtor(this->variable_ht);
1623 }
1624
1625
1626 void
1627 vec4_visitor::fail(const char *format, ...)
1628 {
1629    va_list va;
1630    char *msg;
1631
1632    if (failed)
1633       return;
1634
1635    failed = true;
1636
1637    va_start(va, format);
1638    msg = ralloc_vasprintf(mem_ctx, format, va);
1639    va_end(va);
1640    msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
1641
1642    this->fail_msg = msg;
1643
1644    if (INTEL_DEBUG & DEBUG_VS) {
1645       fprintf(stderr, "%s",  msg);
1646    }
1647 }
1648
1649 } /* namespace brw */