src/mesa/drivers/dri/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/macros.h"
  34 #include "brw_context.h"
  35 #include "brw_wm.h"
  36
  37 #define SATURATE (1<<5)
  38
  39 /* Not quite sure how correct this is - need to understand horiz
  40  * vs. vertical strides a little better.
  41  */
  42 static INLINE struct brw_reg sechalf( struct brw_reg reg )
  43 {
  44    if (reg.vstride)
  45       reg.nr++;
  46    return reg;
  47 }
  48
  49 /* Payload R0:
  50  *
  51  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
  52  *         corresponding to each of the 16 execution channels.
  53  * R0.1..8 -- ?
  54  * R1.0 -- triangle vertex 0.X
  55  * R1.1 -- triangle vertex 0.Y
  56  * R1.2 -- tile 0 x,y coords (2 packed uwords)
  57  * R1.3 -- tile 1 x,y coords (2 packed uwords)
  58  * R1.4 -- tile 2 x,y coords (2 packed uwords)
  59  * R1.5 -- tile 3 x,y coords (2 packed uwords)
  60  * R1.6 -- ?
  61  * R1.7 -- ?
  62  * R1.8 -- ?
  63  */
  64
  65
  66 static void emit_pixel_xy(struct brw_compile *p,
  67                           const struct brw_reg *dst,
  68                           GLuint mask)
  69 {
  70    struct brw_reg r1 = brw_vec1_grf(1, 0);
  71    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
  72
  73    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  74
  75    /* Calculate pixel centers by adding 1 or 0 to each of the
  76     * micro-tile coordinates passed in r1.
  77     */
  78    if (mask & WRITEMASK_X) {
  79       brw_ADD(p,
  80               vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
  81               stride(suboffset(r1_uw, 4), 2, 4, 0),
  82               brw_imm_v(0x10101010));
  83    }
  84
  85    if (mask & WRITEMASK_Y) {
  86       brw_ADD(p,
  87               vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
  88               stride(suboffset(r1_uw,5), 2, 4, 0),
  89               brw_imm_v(0x11001100));
  90    }
  91
  92    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  93 }
  94
  95
  96
  97 static void emit_delta_xy(struct brw_compile *p,
  98                           const struct brw_reg *dst,
  99                           GLuint mask,
 100                           const struct brw_reg *arg0)
 101 {
 102    struct brw_reg r1 = brw_vec1_grf(1, 0);
 103
 104    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 105     * centers.
 106     */
 107    if (mask & WRITEMASK_X) {
 108       brw_ADD(p,
 109               dst[0],
 110               retype(arg0[0], BRW_REGISTER_TYPE_UW),
 111               negate(r1));
 112    }
 113
 114    if (mask & WRITEMASK_Y) {
 115       brw_ADD(p,
 116               dst[1],
 117               retype(arg0[1], BRW_REGISTER_TYPE_UW),
 118               negate(suboffset(r1,1)));
 119
 120    }
 121 }
 122
 123 static void emit_wpos_xy(struct brw_wm_compile *c,
 124                          const struct brw_reg *dst,
 125                          GLuint mask,
 126                          const struct brw_reg *arg0)
 127 {
 128    struct brw_compile *p = &c->func;
 129
 130    /* Calculate the pixel offset from window bottom left into destination
 131     * X and Y channels.
 132     */
 133    if (mask & WRITEMASK_X) {
 134       /* X' = X - origin */
 135       brw_ADD(p,
 136               dst[0],
 137               retype(arg0[0], BRW_REGISTER_TYPE_W),
 138               brw_imm_d(0 - c->key.origin_x));
 139    }
 140
 141    if (mask & WRITEMASK_Y) {
 142       /* Y' = height - (Y - origin_y) = height + origin_y - Y */
 143       brw_ADD(p,
 144               dst[1],
 145               negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
 146               brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
 147    }
 148 }
 149
 150
 151 static void emit_pixel_w( struct brw_compile *p,
 152                           const struct brw_reg *dst,
 153                           GLuint mask,
 154                           const struct brw_reg *arg0,
 155                           const struct brw_reg *deltas)
 156 {
 157    /* Don't need this if all you are doing is interpolating color, for
 158     * instance.
 159     */
 160    if (mask & WRITEMASK_W) {
 161       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 162
 163       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 164        * result straight into a message reg.
 165        */
 166       brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 167       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
 168
 169       /* Calc w */
 170       brw_math_16( p, dst[3],
 171                    BRW_MATH_FUNCTION_INV,
 172                    BRW_MATH_SATURATE_NONE,
 173                    2, brw_null_reg(),
 174                    BRW_MATH_PRECISION_FULL);
 175    }
 176 }
 177
 178
 179
 180 static void emit_linterp( struct brw_compile *p,
 181                          const struct brw_reg *dst,
 182                          GLuint mask,
 183                          const struct brw_reg *arg0,
 184                          const struct brw_reg *deltas )
 185 {
 186    struct brw_reg interp[4];
 187    GLuint nr = arg0[0].nr;
 188    GLuint i;
 189
 190    interp[0] = brw_vec1_grf(nr, 0);
 191    interp[1] = brw_vec1_grf(nr, 4);
 192    interp[2] = brw_vec1_grf(nr+1, 0);
 193    interp[3] = brw_vec1_grf(nr+1, 4);
 194
 195    for (i = 0; i < 4; i++) {
 196       if (mask & (1<<i)) {
 197          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 198          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 199       }
 200    }
 201 }
 202
 203
 204 static void emit_pinterp( struct brw_compile *p,
 205                           const struct brw_reg *dst,
 206                           GLuint mask,
 207                           const struct brw_reg *arg0,
 208                           const struct brw_reg *deltas,
 209                           const struct brw_reg *w)
 210 {
 211    struct brw_reg interp[4];
 212    GLuint nr = arg0[0].nr;
 213    GLuint i;
 214
 215    interp[0] = brw_vec1_grf(nr, 0);
 216    interp[1] = brw_vec1_grf(nr, 4);
 217    interp[2] = brw_vec1_grf(nr+1, 0);
 218    interp[3] = brw_vec1_grf(nr+1, 4);
 219
 220    for (i = 0; i < 4; i++) {
 221       if (mask & (1<<i)) {
 222          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 223          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 224       }
 225    }
 226    for (i = 0; i < 4; i++) {
 227       if (mask & (1<<i)) {
 228          brw_MUL(p, dst[i], dst[i], w[3]);
 229       }
 230    }
 231 }
 232
 233
 234 static void emit_cinterp( struct brw_compile *p,
 235                          const struct brw_reg *dst,
 236                          GLuint mask,
 237                          const struct brw_reg *arg0 )
 238 {
 239    struct brw_reg interp[4];
 240    GLuint nr = arg0[0].nr;
 241    GLuint i;
 242
 243    interp[0] = brw_vec1_grf(nr, 0);
 244    interp[1] = brw_vec1_grf(nr, 4);
 245    interp[2] = brw_vec1_grf(nr+1, 0);
 246    interp[3] = brw_vec1_grf(nr+1, 4);
 247
 248    for (i = 0; i < 4; i++) {
 249       if (mask & (1<<i)) {
 250          brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
 251       }
 252    }
 253 }
 254
 255 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
 256 static void emit_frontfacing( struct brw_compile *p,
 257                               const struct brw_reg *dst,
 258                               GLuint mask )
 259 {
 260    struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
 261    GLuint i;
 262
 263    if (!(mask & WRITEMASK_XYZW))
 264       return;
 265
 266    for (i = 0; i < 4; i++) {
 267       if (mask & (1<<i)) {
 268          brw_MOV(p, dst[i], brw_imm_f(0.0));
 269       }
 270    }
 271
 272    /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
 273     * us front face
 274     */
 275    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
 276    for (i = 0; i < 4; i++) {
 277       if (mask & (1<<i)) {
 278          brw_MOV(p, dst[i], brw_imm_f(1.0));
 279       }
 280    }
 281    brw_set_predicate_control_flag_value(p, 0xff);
 282 }
 283
 284 static void emit_alu1( struct brw_compile *p,
 285                        struct brw_instruction *(*func)(struct brw_compile *,
 286                                                        struct brw_reg,
 287                                                        struct brw_reg),
 288                        const struct brw_reg *dst,
 289                        GLuint mask,
 290                        const struct brw_reg *arg0 )
 291 {
 292    GLuint i;
 293
 294    if (mask & SATURATE)
 295       brw_set_saturate(p, 1);
 296
 297    for (i = 0; i < 4; i++) {
 298       if (mask & (1<<i)) {
 299          func(p, dst[i], arg0[i]);
 300       }
 301    }
 302
 303    if (mask & SATURATE)
 304       brw_set_saturate(p, 0);
 305 }
 306
 307
 308 static void emit_alu2( struct brw_compile *p,
 309                        struct brw_instruction *(*func)(struct brw_compile *,
 310                                                        struct brw_reg,
 311                                                        struct brw_reg,
 312                                                        struct brw_reg),
 313                        const struct brw_reg *dst,
 314                        GLuint mask,
 315                        const struct brw_reg *arg0,
 316                        const struct brw_reg *arg1 )
 317 {
 318    GLuint i;
 319
 320    if (mask & SATURATE)
 321       brw_set_saturate(p, 1);
 322
 323    for (i = 0; i < 4; i++) {
 324       if (mask & (1<<i)) {
 325          func(p, dst[i], arg0[i], arg1[i]);
 326       }
 327    }
 328
 329    if (mask & SATURATE)
 330       brw_set_saturate(p, 0);
 331 }
 332
 333
 334 static void emit_mad( struct brw_compile *p,
 335                       const struct brw_reg *dst,
 336                       GLuint mask,
 337                       const struct brw_reg *arg0,
 338                       const struct brw_reg *arg1,
 339                       const struct brw_reg *arg2 )
 340 {
 341    GLuint i;
 342
 343    for (i = 0; i < 4; i++) {
 344       if (mask & (1<<i)) {
 345          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 346
 347          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 348          brw_ADD(p, dst[i], dst[i], arg2[i]);
 349          brw_set_saturate(p, 0);
 350       }
 351    }
 352 }
 353
 354 static void emit_trunc( struct brw_compile *p,
 355                       const struct brw_reg *dst,
 356                       GLuint mask,
 357                       const struct brw_reg *arg0)
 358 {
 359    GLuint i;
 360
 361    for (i = 0; i < 4; i++) {
 362       if (mask & (1<<i)) {
 363          brw_RNDZ(p, dst[i], arg0[i]);
 364       }
 365    }
 366 }
 367
 368 static void emit_lrp( struct brw_compile *p,
 369                       const struct brw_reg *dst,
 370                       GLuint mask,
 371                       const struct brw_reg *arg0,
 372                       const struct brw_reg *arg1,
 373                       const struct brw_reg *arg2 )
 374 {
 375    GLuint i;
 376
 377    /* Uses dst as a temporary:
 378     */
 379    for (i = 0; i < 4; i++) {
 380       if (mask & (1<<i)) {
 381          /* Can I use the LINE instruction for this?
 382           */
 383          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 384          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 385
 386          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 387          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 388          brw_set_saturate(p, 0);
 389       }
 390    }
 391 }
 392
 393 static void emit_sop( struct brw_compile *p,
 394                       const struct brw_reg *dst,
 395                       GLuint mask,
 396                       GLuint cond,
 397                       const struct brw_reg *arg0,
 398                       const struct brw_reg *arg1 )
 399 {
 400    GLuint i;
 401
 402    for (i = 0; i < 4; i++) {
 403       if (mask & (1<<i)) {
 404          brw_MOV(p, dst[i], brw_imm_f(0));
 405          brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
 406          brw_MOV(p, dst[i], brw_imm_f(1.0));
 407          brw_set_predicate_control_flag_value(p, 0xff);
 408       }
 409    }
 410 }
 411
 412 static void emit_slt( struct brw_compile *p,
 413                       const struct brw_reg *dst,
 414                       GLuint mask,
 415                       const struct brw_reg *arg0,
 416                       const struct brw_reg *arg1 )
 417 {
 418    emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
 419 }
 420
 421 static void emit_sle( struct brw_compile *p,
 422                       const struct brw_reg *dst,
 423                       GLuint mask,
 424                       const struct brw_reg *arg0,
 425                       const struct brw_reg *arg1 )
 426 {
 427    emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
 428 }
 429
 430 static void emit_sgt( struct brw_compile *p,
 431                       const struct brw_reg *dst,
 432                       GLuint mask,
 433                       const struct brw_reg *arg0,
 434                       const struct brw_reg *arg1 )
 435 {
 436    emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
 437 }
 438
 439 static void emit_sge( struct brw_compile *p,
 440                       const struct brw_reg *dst,
 441                       GLuint mask,
 442                       const struct brw_reg *arg0,
 443                       const struct brw_reg *arg1 )
 444 {
 445    emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
 446 }
 447
 448 static void emit_seq( struct brw_compile *p,
 449                       const struct brw_reg *dst,
 450                       GLuint mask,
 451                       const struct brw_reg *arg0,
 452                       const struct brw_reg *arg1 )
 453 {
 454    emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
 455 }
 456
 457 static void emit_sne( struct brw_compile *p,
 458                       const struct brw_reg *dst,
 459                       GLuint mask,
 460                       const struct brw_reg *arg0,
 461                       const struct brw_reg *arg1 )
 462 {
 463    emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 464 }
 465
 466 static void emit_cmp( struct brw_compile *p,
 467                       const struct brw_reg *dst,
 468                       GLuint mask,
 469                       const struct brw_reg *arg0,
 470                       const struct brw_reg *arg1,
 471                       const struct brw_reg *arg2 )
 472 {
 473    GLuint i;
 474
 475    for (i = 0; i < 4; i++) {
 476       if (mask & (1<<i)) {
 477          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 478          brw_MOV(p, dst[i], arg2[i]);
 479          brw_set_saturate(p, 0);
 480
 481          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 482
 483          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 484          brw_MOV(p, dst[i], arg1[i]);
 485          brw_set_saturate(p, 0);
 486          brw_set_predicate_control_flag_value(p, 0xff);
 487       }
 488    }
 489 }
 490
 491 static void emit_max( struct brw_compile *p,
 492                       const struct brw_reg *dst,
 493                       GLuint mask,
 494                       const struct brw_reg *arg0,
 495                       const struct brw_reg *arg1 )
 496 {
 497    GLuint i;
 498
 499    for (i = 0; i < 4; i++) {
 500       if (mask & (1<<i)) {
 501          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 502          brw_MOV(p, dst[i], arg0[i]);
 503          brw_set_saturate(p, 0);
 504
 505          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 506
 507          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 508          brw_MOV(p, dst[i], arg1[i]);
 509          brw_set_saturate(p, 0);
 510          brw_set_predicate_control_flag_value(p, 0xff);
 511       }
 512    }
 513 }
 514
 515 static void emit_min( struct brw_compile *p,
 516                       const struct brw_reg *dst,
 517                       GLuint mask,
 518                       const struct brw_reg *arg0,
 519                       const struct brw_reg *arg1 )
 520 {
 521    GLuint i;
 522
 523    for (i = 0; i < 4; i++) {
 524       if (mask & (1<<i)) {
 525          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 526          brw_MOV(p, dst[i], arg1[i]);
 527          brw_set_saturate(p, 0);
 528
 529          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 530
 531          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 532          brw_MOV(p, dst[i], arg0[i]);
 533          brw_set_saturate(p, 0);
 534          brw_set_predicate_control_flag_value(p, 0xff);
 535       }
 536    }
 537 }
 538
 539
 540 static void emit_dp3( struct brw_compile *p,
 541                       const struct brw_reg *dst,
 542                       GLuint mask,
 543                       const struct brw_reg *arg0,
 544                       const struct brw_reg *arg1 )
 545 {
 546    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 547
 548    if (!(mask & WRITEMASK_XYZW))
 549       return; /* Do not emit dead code */
 550
 551    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 552
 553    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 554    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 555
 556    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 557    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 558    brw_set_saturate(p, 0);
 559 }
 560
 561
 562 static void emit_dp4( struct brw_compile *p,
 563                       const struct brw_reg *dst,
 564                       GLuint mask,
 565                       const struct brw_reg *arg0,
 566                       const struct brw_reg *arg1 )
 567 {
 568    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 569
 570    if (!(mask & WRITEMASK_XYZW))
 571       return; /* Do not emit dead code */
 572
 573    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 574
 575    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 576    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 577    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 578
 579    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 580    brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
 581    brw_set_saturate(p, 0);
 582 }
 583
 584
 585 static void emit_dph( struct brw_compile *p,
 586                       const struct brw_reg *dst,
 587                       GLuint mask,
 588                       const struct brw_reg *arg0,
 589                       const struct brw_reg *arg1 )
 590 {
 591    if (!(mask & WRITEMASK_XYZW))
 592       return; /* Do not emit dead code */
 593
 594    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 595
 596    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 597    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 598    brw_MAC(p, dst[0], arg0[2], arg1[2]);
 599
 600    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 601    brw_ADD(p, dst[0], dst[0], arg1[3]);
 602    brw_set_saturate(p, 0);
 603 }
 604
 605
 606 static void emit_xpd( struct brw_compile *p,
 607                       const struct brw_reg *dst,
 608                       GLuint mask,
 609                       const struct brw_reg *arg0,
 610                       const struct brw_reg *arg1 )
 611 {
 612    GLuint i;
 613
 614    assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
 615
 616    for (i = 0 ; i < 3; i++) {
 617       if (mask & (1<<i)) {
 618          GLuint i2 = (i+2)%3;
 619          GLuint i1 = (i+1)%3;
 620
 621          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 622
 623          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 624          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 625          brw_set_saturate(p, 0);
 626       }
 627    }
 628 }
 629
 630
 631 static void emit_math1( struct brw_compile *p,
 632                         GLuint function,
 633                         const struct brw_reg *dst,
 634                         GLuint mask,
 635                         const struct brw_reg *arg0 )
 636 {
 637    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 638
 639    if (!(mask & WRITEMASK_XYZW))
 640       return; /* Do not emit dead code */
 641
 642    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 643
 644    brw_MOV(p, brw_message_reg(2), arg0[0]);
 645
 646    /* Send two messages to perform all 16 operations:
 647     */
 648    brw_math_16(p,
 649                dst[dst_chan],
 650                function,
 651                (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 652                2,
 653                brw_null_reg(),
 654                BRW_MATH_PRECISION_FULL);
 655 }
 656
 657
 658 static void emit_math2( struct brw_compile *p,
 659                         GLuint function,
 660                         const struct brw_reg *dst,
 661                         GLuint mask,
 662                         const struct brw_reg *arg0,
 663                         const struct brw_reg *arg1)
 664 {
 665    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 666
 667    if (!(mask & WRITEMASK_XYZW))
 668       return; /* Do not emit dead code */
 669
 670    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 671
 672    brw_push_insn_state(p);
 673
 674    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 675    brw_MOV(p, brw_message_reg(2), arg0[0]);
 676    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 677    brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
 678
 679    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 680    brw_MOV(p, brw_message_reg(3), arg1[0]);
 681    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 682    brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
 683
 684
 685    /* Send two messages to perform all 16 operations:
 686     */
 687    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 688    brw_math(p,
 689             dst[dst_chan],
 690             function,
 691             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 692             2,
 693             brw_null_reg(),
 694             BRW_MATH_DATA_VECTOR,
 695             BRW_MATH_PRECISION_FULL);
 696
 697    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 698    brw_math(p,
 699             offset(dst[dst_chan],1),
 700             function,
 701             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 702             4,
 703             brw_null_reg(),
 704             BRW_MATH_DATA_VECTOR,
 705             BRW_MATH_PRECISION_FULL);
 706
 707    brw_pop_insn_state(p);
 708 }
 709
 710
 711
 712 static void emit_tex( struct brw_wm_compile *c,
 713                       const struct brw_wm_instruction *inst,
 714                       struct brw_reg *dst,
 715                       GLuint dst_flags,
 716                       struct brw_reg *arg )
 717 {
 718    struct brw_compile *p = &c->func;
 719    GLuint msgLength, responseLength;
 720    GLuint i, nr;
 721    GLuint emit;
 722    GLuint msg_type;
 723
 724    /* How many input regs are there?
 725     */
 726    switch (inst->tex_idx) {
 727    case TEXTURE_1D_INDEX:
 728       emit = WRITEMASK_X;
 729       nr = 1;
 730       break;
 731    case TEXTURE_2D_INDEX:
 732    case TEXTURE_RECT_INDEX:
 733       emit = WRITEMASK_XY;
 734       nr = 2;
 735       break;
 736    default:
 737       emit = WRITEMASK_XYZ;
 738       nr = 3;
 739       break;
 740    }
 741
 742    if (inst->tex_shadow) {
 743       nr = 4;
 744       emit |= WRITEMASK_W;
 745    }
 746
 747    msgLength = 1;
 748
 749    for (i = 0; i < nr; i++) {
 750       static const GLuint swz[4] = {0,1,2,2};
 751       if (emit & (1<<i))
 752          brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
 753       else
 754          brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
 755       msgLength += 2;
 756    }
 757
 758    responseLength = 8;          /* always */
 759
 760    if (BRW_IS_IGDNG(p->brw)) {
 761        if (inst->tex_shadow)
 762            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
 763        else
 764            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
 765    } else {
 766        if (inst->tex_shadow)
 767            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
 768        else
 769            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
 770    }
 771
 772    brw_SAMPLE(p,
 773               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 774               1,
 775               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 776               SURF_INDEX_TEXTURE(inst->tex_unit),
 777               inst->tex_unit,     /* sampler */
 778               inst->writemask,
 779               msg_type,
 780               responseLength,
 781               msgLength,
 782               0,
 783               1,
 784               BRW_SAMPLER_SIMD_MODE_SIMD16);
 785 }
 786
 787
 788 static void emit_txb( struct brw_wm_compile *c,
 789                       const struct brw_wm_instruction *inst,
 790                       struct brw_reg *dst,
 791                       GLuint dst_flags,
 792                       struct brw_reg *arg )
 793 {
 794    struct brw_compile *p = &c->func;
 795    GLuint msgLength;
 796    GLuint msg_type;
 797    /* Shadow ignored for txb.
 798     */
 799    switch (inst->tex_idx) {
 800    case TEXTURE_1D_INDEX:
 801       brw_MOV(p, brw_message_reg(2), arg[0]);
 802       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
 803       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 804       break;
 805    case TEXTURE_2D_INDEX:
 806    case TEXTURE_RECT_INDEX:
 807       brw_MOV(p, brw_message_reg(2), arg[0]);
 808       brw_MOV(p, brw_message_reg(4), arg[1]);
 809       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 810       break;
 811    default:
 812       brw_MOV(p, brw_message_reg(2), arg[0]);
 813       brw_MOV(p, brw_message_reg(4), arg[1]);
 814       brw_MOV(p, brw_message_reg(6), arg[2]);
 815       break;
 816    }
 817
 818    brw_MOV(p, brw_message_reg(8), arg[3]);
 819    msgLength = 9;
 820
 821    if (BRW_IS_IGDNG(p->brw))
 822        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
 823    else
 824        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
 825
 826    brw_SAMPLE(p,
 827               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 828               1,
 829               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 830               SURF_INDEX_TEXTURE(inst->tex_unit),
 831               inst->tex_unit,     /* sampler */
 832               inst->writemask,
 833               msg_type,
 834               8,                /* responseLength */
 835               msgLength,
 836               0,
 837               1,
 838               BRW_SAMPLER_SIMD_MODE_SIMD16);
 839 }
 840
 841
 842 static void emit_lit( struct brw_compile *p,
 843                       const struct brw_reg *dst,
 844                       GLuint mask,
 845                       const struct brw_reg *arg0 )
 846 {
 847    assert((mask & WRITEMASK_XW) == 0);
 848
 849    if (mask & WRITEMASK_Y) {
 850       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 851       brw_MOV(p, dst[1], arg0[0]);
 852       brw_set_saturate(p, 0);
 853    }
 854
 855    if (mask & WRITEMASK_Z) {
 856       emit_math2(p, BRW_MATH_FUNCTION_POW,
 857                  &dst[2],
 858                  WRITEMASK_X | (mask & SATURATE),
 859                  &arg0[1],
 860                  &arg0[3]);
 861    }
 862
 863    /* Ordinarily you'd use an iff statement to skip or shortcircuit
 864     * some of the POW calculations above, but 16-wide iff statements
 865     * seem to lock c1 hardware, so this is a nasty workaround:
 866     */
 867    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
 868    {
 869       if (mask & WRITEMASK_Y)
 870          brw_MOV(p, dst[1], brw_imm_f(0));
 871
 872       if (mask & WRITEMASK_Z)
 873          brw_MOV(p, dst[2], brw_imm_f(0));
 874    }
 875    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 876 }
 877
 878
 879 /* Kill pixel - set execution mask to zero for those pixels which
 880  * fail.
 881  */
 882 static void emit_kil( struct brw_wm_compile *c,
 883                       struct brw_reg *arg0)
 884 {
 885    struct brw_compile *p = &c->func;
 886    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 887    GLuint i;
 888
 889    /* XXX - usually won't need 4 compares!
 890     */
 891    for (i = 0; i < 4; i++) {
 892       brw_push_insn_state(p);
 893       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
 894       brw_set_predicate_control_flag_value(p, 0xff);
 895       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 896       brw_AND(p, r0uw, brw_flag_reg(), r0uw);
 897       brw_pop_insn_state(p);
 898    }
 899 }
 900
 901
 902 static void fire_fb_write( struct brw_wm_compile *c,
 903                            GLuint base_reg,
 904                            GLuint nr,
 905                            GLuint target,
 906                            GLuint eot )
 907 {
 908    struct brw_compile *p = &c->func;
 909
 910    /* Pass through control information:
 911     */
 912 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
 913    {
 914       brw_push_insn_state(p);
 915       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
 916       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 917       brw_MOV(p,
 918                brw_message_reg(base_reg + 1),
 919                brw_vec8_grf(1, 0));
 920       brw_pop_insn_state(p);
 921    }
 922
 923    /* Send framebuffer write message: */
 924 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
 925    brw_fb_WRITE(p,
 926                 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
 927                 base_reg,
 928                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
 929                 target,
 930                 nr,
 931                 0,
 932                 eot);
 933 }
 934
 935
 936 static void emit_aa( struct brw_wm_compile *c,
 937                      struct brw_reg *arg1,
 938                      GLuint reg )
 939 {
 940    struct brw_compile *p = &c->func;
 941    GLuint comp = c->key.aa_dest_stencil_reg / 2;
 942    GLuint off = c->key.aa_dest_stencil_reg % 2;
 943    struct brw_reg aa = offset(arg1[comp], off);
 944
 945    brw_push_insn_state(p);
 946    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
 947    brw_MOV(p, brw_message_reg(reg), aa);
 948    brw_pop_insn_state(p);
 949 }
 950
 951
 952 /* Post-fragment-program processing.  Send the results to the
 953  * framebuffer.
 954  * \param arg0  the fragment color
 955  * \param arg1  the pass-through depth value
 956  * \param arg2  the shader-computed depth value
 957  */
 958 static void emit_fb_write( struct brw_wm_compile *c,
 959                            struct brw_reg *arg0,
 960                            struct brw_reg *arg1,
 961                            struct brw_reg *arg2,
 962                            GLuint target,
 963                            GLuint eot)
 964 {
 965    struct brw_compile *p = &c->func;
 966    GLuint nr = 2;
 967    GLuint channel;
 968
 969    /* Reserve a space for AA - may not be needed:
 970     */
 971    if (c->key.aa_dest_stencil_reg)
 972       nr += 1;
 973
 974    /* I don't really understand how this achieves the color interleave
 975     * (ie RGBARGBA) in the result:  [Do the saturation here]
 976     */
 977    {
 978       brw_push_insn_state(p);
 979
 980       for (channel = 0; channel < 4; channel++) {
 981          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
 982          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
 983
 984          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 985          brw_MOV(p,
 986                  brw_message_reg(nr + channel),
 987                  arg0[channel]);
 988
 989          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 990          brw_MOV(p,
 991                  brw_message_reg(nr + channel + 4),
 992                  sechalf(arg0[channel]));
 993       }
 994
 995       /* skip over the regs populated above:
 996        */
 997       nr += 8;
 998
 999       brw_pop_insn_state(p);
1000    }
1001
1002    if (c->key.source_depth_to_render_target)
1003    {
1004       if (c->key.computes_depth)
1005          brw_MOV(p, brw_message_reg(nr), arg2[2]);
1006       else
1007          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1008
1009       nr += 2;
1010    }
1011
1012    if (c->key.dest_depth_reg)
1013    {
1014       GLuint comp = c->key.dest_depth_reg / 2;
1015       GLuint off = c->key.dest_depth_reg % 2;
1016
1017       if (off != 0) {
1018          brw_push_insn_state(p);
1019          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1020
1021          brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1022          /* 2nd half? */
1023          brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1024          brw_pop_insn_state(p);
1025       }
1026       else {
1027          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1028       }
1029       nr += 2;
1030    }
1031
1032    if (!c->key.runtime_check_aads_emit) {
1033       if (c->key.aa_dest_stencil_reg)
1034          emit_aa(c, arg1, 2);
1035
1036       fire_fb_write(c, 0, nr, target, eot);
1037    }
1038    else {
1039       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1040       struct brw_reg ip = brw_ip_reg();
1041       struct brw_instruction *jmp;
1042
1043       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1044       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1045       brw_AND(p,
1046               v1_null_ud,
1047               get_element_ud(brw_vec8_grf(1,0), 6),
1048               brw_imm_ud(1<<26));
1049
1050       jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1051       {
1052          emit_aa(c, arg1, 2);
1053          fire_fb_write(c, 0, nr, target, eot);
1054          /* note - thread killed in subroutine */
1055       }
1056       brw_land_fwd_jump(p, jmp);
1057
1058       /* ELSE: Shuffle up one register to fill in the hole left for AA:
1059        */
1060       fire_fb_write(c, 1, nr-1, target, eot);
1061    }
1062 }
1063
1064
1065 /**
1066  * Move a GPR to scratch memory.
1067  */
1068 static void emit_spill( struct brw_wm_compile *c,
1069                         struct brw_reg reg,
1070                         GLuint slot )
1071 {
1072    struct brw_compile *p = &c->func;
1073
1074    /*
1075      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
1076    */
1077    brw_MOV(p, brw_message_reg(2), reg);
1078
1079    /*
1080      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
1081      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
1082    */
1083    brw_dp_WRITE_16(p,
1084                    retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1085                    slot);
1086 }
1087
1088
1089 /**
1090  * Load a GPR from scratch memory.
1091  */
1092 static void emit_unspill( struct brw_wm_compile *c,
1093                           struct brw_reg reg,
1094                           GLuint slot )
1095 {
1096    struct brw_compile *p = &c->func;
1097
1098    /* Slot 0 is the undef value.
1099     */
1100    if (slot == 0) {
1101       brw_MOV(p, reg, brw_imm_f(0));
1102       return;
1103    }
1104
1105    /*
1106      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
1107      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
1108    */
1109
1110    brw_dp_READ_16(p,
1111                   retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1112                   slot);
1113 }
1114
1115
1116 /**
1117  * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1118  * Args with unspill_reg != 0 will be loaded from scratch memory.
1119  */
1120 static void get_argument_regs( struct brw_wm_compile *c,
1121                                struct brw_wm_ref *arg[],
1122                                struct brw_reg *regs )
1123 {
1124    GLuint i;
1125
1126    for (i = 0; i < 4; i++) {
1127       if (arg[i]) {
1128          if (arg[i]->unspill_reg)
1129             emit_unspill(c,
1130                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1131                          arg[i]->value->spill_slot);
1132
1133          regs[i] = arg[i]->hw_reg;
1134       }
1135       else {
1136          regs[i] = brw_null_reg();
1137       }
1138    }
1139 }
1140
1141
1142 /**
1143  * For values that have a spill_slot!=0, write those regs to scratch memory.
1144  */
1145 static void spill_values( struct brw_wm_compile *c,
1146                           struct brw_wm_value *values,
1147                           GLuint nr )
1148 {
1149    GLuint i;
1150
1151    for (i = 0; i < nr; i++)
1152       if (values[i].spill_slot)
1153          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1154 }
1155
1156
1157 /* Emit the fragment program instructions here.
1158  */
1159 void brw_wm_emit( struct brw_wm_compile *c )
1160 {
1161    struct brw_compile *p = &c->func;
1162    GLuint insn;
1163
1164    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1165
1166    /* Check if any of the payload regs need to be spilled:
1167     */
1168    spill_values(c, c->payload.depth, 4);
1169    spill_values(c, c->creg, c->nr_creg);
1170    spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1171
1172
1173    for (insn = 0; insn < c->nr_insns; insn++) {
1174
1175       struct brw_wm_instruction *inst = &c->instruction[insn];
1176       struct brw_reg args[3][4], dst[4];
1177       GLuint i, dst_flags;
1178
1179       /* Get argument regs:
1180        */
1181       for (i = 0; i < 3; i++)
1182          get_argument_regs(c, inst->src[i], args[i]);
1183
1184       /* Get dest regs:
1185        */
1186       for (i = 0; i < 4; i++)
1187          if (inst->dst[i])
1188             dst[i] = inst->dst[i]->hw_reg;
1189          else
1190             dst[i] = brw_null_reg();
1191
1192       /* Flags
1193        */
1194       dst_flags = inst->writemask;
1195       if (inst->saturate)
1196          dst_flags |= SATURATE;
1197
1198       switch (inst->opcode) {
1199          /* Generated instructions for calculating triangle interpolants:
1200           */
1201       case WM_PIXELXY:
1202          emit_pixel_xy(p, dst, dst_flags);
1203          break;
1204
1205       case WM_DELTAXY:
1206          emit_delta_xy(p, dst, dst_flags, args[0]);
1207          break;
1208
1209       case WM_WPOSXY:
1210          emit_wpos_xy(c, dst, dst_flags, args[0]);
1211          break;
1212
1213       case WM_PIXELW:
1214          emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1215          break;
1216
1217       case WM_LINTERP:
1218          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1219          break;
1220
1221       case WM_PINTERP:
1222          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1223          break;
1224
1225       case WM_CINTERP:
1226          emit_cinterp(p, dst, dst_flags, args[0]);
1227          break;
1228
1229       case WM_FB_WRITE:
1230          emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1231          break;
1232
1233       case WM_FRONTFACING:
1234          emit_frontfacing(p, dst, dst_flags);
1235          break;
1236
1237          /* Straightforward arithmetic:
1238           */
1239       case OPCODE_ADD:
1240          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1241          break;
1242
1243       case OPCODE_FRC:
1244          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1245          break;
1246
1247       case OPCODE_FLR:
1248          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1249          break;
1250
1251       case OPCODE_DP3:
1252          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1253          break;
1254
1255       case OPCODE_DP4:
1256          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1257          break;
1258
1259       case OPCODE_DPH:
1260          emit_dph(p, dst, dst_flags, args[0], args[1]);
1261          break;
1262
1263       case OPCODE_TRUNC:
1264          emit_trunc(p, dst, dst_flags, args[0]);
1265          break;
1266
1267       case OPCODE_LRP:
1268          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1269          break;
1270
1271       case OPCODE_MAD:
1272          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1273          break;
1274
1275       case OPCODE_MOV:
1276       case OPCODE_SWZ:
1277          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1278          break;
1279
1280       case OPCODE_MUL:
1281          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1282          break;
1283
1284       case OPCODE_XPD:
1285          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1286          break;
1287
1288          /* Higher math functions:
1289           */
1290       case OPCODE_RCP:
1291          emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1292          break;
1293
1294       case OPCODE_RSQ:
1295          emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1296          break;
1297
1298       case OPCODE_SIN:
1299          emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1300          break;
1301
1302       case OPCODE_COS:
1303          emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1304          break;
1305
1306       case OPCODE_EX2:
1307          emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1308          break;
1309
1310       case OPCODE_LG2:
1311          emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1312          break;
1313
1314       case OPCODE_SCS:
1315          /* There is an scs math function, but it would need some
1316           * fixup for 16-element execution.
1317           */
1318          if (dst_flags & WRITEMASK_X)
1319             emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1320          if (dst_flags & WRITEMASK_Y)
1321             emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1322          break;
1323
1324       case OPCODE_POW:
1325          emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1326          break;
1327
1328          /* Comparisons:
1329           */
1330       case OPCODE_CMP:
1331          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1332          break;
1333
1334       case OPCODE_MAX:
1335          emit_max(p, dst, dst_flags, args[0], args[1]);
1336          break;
1337
1338       case OPCODE_MIN:
1339          emit_min(p, dst, dst_flags, args[0], args[1]);
1340          break;
1341
1342       case OPCODE_SLT:
1343          emit_slt(p, dst, dst_flags, args[0], args[1]);
1344          break;
1345
1346       case OPCODE_SLE:
1347          emit_sle(p, dst, dst_flags, args[0], args[1]);
1348         break;
1349       case OPCODE_SGT:
1350          emit_sgt(p, dst, dst_flags, args[0], args[1]);
1351         break;
1352       case OPCODE_SGE:
1353          emit_sge(p, dst, dst_flags, args[0], args[1]);
1354          break;
1355       case OPCODE_SEQ:
1356          emit_seq(p, dst, dst_flags, args[0], args[1]);
1357         break;
1358       case OPCODE_SNE:
1359          emit_sne(p, dst, dst_flags, args[0], args[1]);
1360         break;
1361
1362       case OPCODE_LIT:
1363          emit_lit(p, dst, dst_flags, args[0]);
1364          break;
1365
1366          /* Texturing operations:
1367           */
1368       case OPCODE_TEX:
1369          emit_tex(c, inst, dst, dst_flags, args[0]);
1370          break;
1371
1372       case OPCODE_TXB:
1373          emit_txb(c, inst, dst, dst_flags, args[0]);
1374          break;
1375
1376       case OPCODE_KIL:
1377          emit_kil(c, args[0]);
1378          break;
1379
1380       default:
1381          _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1382                       inst->opcode, inst->opcode < MAX_OPCODE ?
1383                                     _mesa_opcode_string(inst->opcode) :
1384                                     "unknown");
1385       }
1386
1387       for (i = 0; i < 4; i++)
1388         if (inst->dst[i] && inst->dst[i]->spill_slot)
1389            emit_spill(c,
1390                       inst->dst[i]->hw_reg,
1391                       inst->dst[i]->spill_slot);
1392    }
1393
1394    if (INTEL_DEBUG & DEBUG_WM) {
1395       int i;
1396
1397       _mesa_printf("wm-native:\n");
1398       for (i = 0; i < p->nr_insn; i++)
1399          brw_disasm(stderr, &p->store[i]);
1400       _mesa_printf("\n");
1401    }
1402 }