2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 #define SATURATE (1<<5)
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
42 static INLINE struct brw_reg sechalf( struct brw_reg reg )
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
66 static void emit_pixel_xy(struct brw_compile *p,
67 const struct brw_reg *dst,
70 struct brw_reg r1 = brw_vec1_grf(1, 0);
71 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
73 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75 /* Calculate pixel centers by adding 1 or 0 to each of the
76 * micro-tile coordinates passed in r1.
78 if (mask & WRITEMASK_X) {
80 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
81 stride(suboffset(r1_uw, 4), 2, 4, 0),
82 brw_imm_v(0x10101010));
85 if (mask & WRITEMASK_Y) {
87 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
88 stride(suboffset(r1_uw,5), 2, 4, 0),
89 brw_imm_v(0x11001100));
92 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
97 static void emit_delta_xy(struct brw_compile *p,
98 const struct brw_reg *dst,
100 const struct brw_reg *arg0)
102 struct brw_reg r1 = brw_vec1_grf(1, 0);
104 /* Calc delta X,Y by subtracting origin in r1 from the pixel
107 if (mask & WRITEMASK_X) {
110 retype(arg0[0], BRW_REGISTER_TYPE_UW),
114 if (mask & WRITEMASK_Y) {
117 retype(arg0[1], BRW_REGISTER_TYPE_UW),
118 negate(suboffset(r1,1)));
123 static void emit_wpos_xy(struct brw_wm_compile *c,
124 const struct brw_reg *dst,
126 const struct brw_reg *arg0)
128 struct brw_compile *p = &c->func;
130 /* Calculate the pixel offset from window bottom left into destination
133 if (mask & WRITEMASK_X) {
134 /* X' = X - origin */
137 retype(arg0[0], BRW_REGISTER_TYPE_W),
138 brw_imm_d(0 - c->key.origin_x));
141 if (mask & WRITEMASK_Y) {
142 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
145 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
146 brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
151 static void emit_pixel_w( struct brw_compile *p,
152 const struct brw_reg *dst,
154 const struct brw_reg *arg0,
155 const struct brw_reg *deltas)
157 /* Don't need this if all you are doing is interpolating color, for
160 if (mask & WRITEMASK_W) {
161 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
163 /* Calc 1/w - just linterp wpos[3] optimized by putting the
164 * result straight into a message reg.
166 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
167 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
170 brw_math_16( p, dst[3],
171 BRW_MATH_FUNCTION_INV,
172 BRW_MATH_SATURATE_NONE,
174 BRW_MATH_PRECISION_FULL);
180 static void emit_linterp( struct brw_compile *p,
181 const struct brw_reg *dst,
183 const struct brw_reg *arg0,
184 const struct brw_reg *deltas )
186 struct brw_reg interp[4];
187 GLuint nr = arg0[0].nr;
190 interp[0] = brw_vec1_grf(nr, 0);
191 interp[1] = brw_vec1_grf(nr, 4);
192 interp[2] = brw_vec1_grf(nr+1, 0);
193 interp[3] = brw_vec1_grf(nr+1, 4);
195 for (i = 0; i < 4; i++) {
197 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
198 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
204 static void emit_pinterp( struct brw_compile *p,
205 const struct brw_reg *dst,
207 const struct brw_reg *arg0,
208 const struct brw_reg *deltas,
209 const struct brw_reg *w)
211 struct brw_reg interp[4];
212 GLuint nr = arg0[0].nr;
215 interp[0] = brw_vec1_grf(nr, 0);
216 interp[1] = brw_vec1_grf(nr, 4);
217 interp[2] = brw_vec1_grf(nr+1, 0);
218 interp[3] = brw_vec1_grf(nr+1, 4);
220 for (i = 0; i < 4; i++) {
222 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
223 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
226 for (i = 0; i < 4; i++) {
228 brw_MUL(p, dst[i], dst[i], w[3]);
234 static void emit_cinterp( struct brw_compile *p,
235 const struct brw_reg *dst,
237 const struct brw_reg *arg0 )
239 struct brw_reg interp[4];
240 GLuint nr = arg0[0].nr;
243 interp[0] = brw_vec1_grf(nr, 0);
244 interp[1] = brw_vec1_grf(nr, 4);
245 interp[2] = brw_vec1_grf(nr+1, 0);
246 interp[3] = brw_vec1_grf(nr+1, 4);
248 for (i = 0; i < 4; i++) {
250 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
255 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
256 static void emit_frontfacing( struct brw_compile *p,
257 const struct brw_reg *dst,
260 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
263 if (!(mask & WRITEMASK_XYZW))
266 for (i = 0; i < 4; i++) {
268 brw_MOV(p, dst[i], brw_imm_f(0.0));
272 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
275 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
276 for (i = 0; i < 4; i++) {
278 brw_MOV(p, dst[i], brw_imm_f(1.0));
281 brw_set_predicate_control_flag_value(p, 0xff);
284 static void emit_alu1( struct brw_compile *p,
285 struct brw_instruction *(*func)(struct brw_compile *,
288 const struct brw_reg *dst,
290 const struct brw_reg *arg0 )
295 brw_set_saturate(p, 1);
297 for (i = 0; i < 4; i++) {
299 func(p, dst[i], arg0[i]);
304 brw_set_saturate(p, 0);
308 static void emit_alu2( struct brw_compile *p,
309 struct brw_instruction *(*func)(struct brw_compile *,
313 const struct brw_reg *dst,
315 const struct brw_reg *arg0,
316 const struct brw_reg *arg1 )
321 brw_set_saturate(p, 1);
323 for (i = 0; i < 4; i++) {
325 func(p, dst[i], arg0[i], arg1[i]);
330 brw_set_saturate(p, 0);
334 static void emit_mad( struct brw_compile *p,
335 const struct brw_reg *dst,
337 const struct brw_reg *arg0,
338 const struct brw_reg *arg1,
339 const struct brw_reg *arg2 )
343 for (i = 0; i < 4; i++) {
345 brw_MUL(p, dst[i], arg0[i], arg1[i]);
347 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
348 brw_ADD(p, dst[i], dst[i], arg2[i]);
349 brw_set_saturate(p, 0);
354 static void emit_trunc( struct brw_compile *p,
355 const struct brw_reg *dst,
357 const struct brw_reg *arg0)
361 for (i = 0; i < 4; i++) {
363 brw_RNDZ(p, dst[i], arg0[i]);
368 static void emit_lrp( struct brw_compile *p,
369 const struct brw_reg *dst,
371 const struct brw_reg *arg0,
372 const struct brw_reg *arg1,
373 const struct brw_reg *arg2 )
377 /* Uses dst as a temporary:
379 for (i = 0; i < 4; i++) {
381 /* Can I use the LINE instruction for this?
383 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
384 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
386 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
387 brw_MAC(p, dst[i], arg0[i], arg1[i]);
388 brw_set_saturate(p, 0);
393 static void emit_sop( struct brw_compile *p,
394 const struct brw_reg *dst,
397 const struct brw_reg *arg0,
398 const struct brw_reg *arg1 )
402 for (i = 0; i < 4; i++) {
404 brw_MOV(p, dst[i], brw_imm_f(0));
405 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
406 brw_MOV(p, dst[i], brw_imm_f(1.0));
407 brw_set_predicate_control_flag_value(p, 0xff);
412 static void emit_slt( struct brw_compile *p,
413 const struct brw_reg *dst,
415 const struct brw_reg *arg0,
416 const struct brw_reg *arg1 )
418 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
421 static void emit_sle( struct brw_compile *p,
422 const struct brw_reg *dst,
424 const struct brw_reg *arg0,
425 const struct brw_reg *arg1 )
427 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
430 static void emit_sgt( struct brw_compile *p,
431 const struct brw_reg *dst,
433 const struct brw_reg *arg0,
434 const struct brw_reg *arg1 )
436 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
439 static void emit_sge( struct brw_compile *p,
440 const struct brw_reg *dst,
442 const struct brw_reg *arg0,
443 const struct brw_reg *arg1 )
445 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
448 static void emit_seq( struct brw_compile *p,
449 const struct brw_reg *dst,
451 const struct brw_reg *arg0,
452 const struct brw_reg *arg1 )
454 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
457 static void emit_sne( struct brw_compile *p,
458 const struct brw_reg *dst,
460 const struct brw_reg *arg0,
461 const struct brw_reg *arg1 )
463 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
466 static void emit_cmp( struct brw_compile *p,
467 const struct brw_reg *dst,
469 const struct brw_reg *arg0,
470 const struct brw_reg *arg1,
471 const struct brw_reg *arg2 )
475 for (i = 0; i < 4; i++) {
477 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
478 brw_MOV(p, dst[i], arg2[i]);
479 brw_set_saturate(p, 0);
481 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
483 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
484 brw_MOV(p, dst[i], arg1[i]);
485 brw_set_saturate(p, 0);
486 brw_set_predicate_control_flag_value(p, 0xff);
491 static void emit_max( struct brw_compile *p,
492 const struct brw_reg *dst,
494 const struct brw_reg *arg0,
495 const struct brw_reg *arg1 )
499 for (i = 0; i < 4; i++) {
501 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
502 brw_MOV(p, dst[i], arg0[i]);
503 brw_set_saturate(p, 0);
505 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
507 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
508 brw_MOV(p, dst[i], arg1[i]);
509 brw_set_saturate(p, 0);
510 brw_set_predicate_control_flag_value(p, 0xff);
515 static void emit_min( struct brw_compile *p,
516 const struct brw_reg *dst,
518 const struct brw_reg *arg0,
519 const struct brw_reg *arg1 )
523 for (i = 0; i < 4; i++) {
525 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
526 brw_MOV(p, dst[i], arg1[i]);
527 brw_set_saturate(p, 0);
529 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
531 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
532 brw_MOV(p, dst[i], arg0[i]);
533 brw_set_saturate(p, 0);
534 brw_set_predicate_control_flag_value(p, 0xff);
540 static void emit_dp3( struct brw_compile *p,
541 const struct brw_reg *dst,
543 const struct brw_reg *arg0,
544 const struct brw_reg *arg1 )
546 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
548 if (!(mask & WRITEMASK_XYZW))
549 return; /* Do not emit dead code */
551 assert(is_power_of_two(mask & WRITEMASK_XYZW));
553 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
554 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
556 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
557 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
558 brw_set_saturate(p, 0);
562 static void emit_dp4( struct brw_compile *p,
563 const struct brw_reg *dst,
565 const struct brw_reg *arg0,
566 const struct brw_reg *arg1 )
568 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
570 if (!(mask & WRITEMASK_XYZW))
571 return; /* Do not emit dead code */
573 assert(is_power_of_two(mask & WRITEMASK_XYZW));
575 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
576 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
577 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
579 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
580 brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
581 brw_set_saturate(p, 0);
585 static void emit_dph( struct brw_compile *p,
586 const struct brw_reg *dst,
588 const struct brw_reg *arg0,
589 const struct brw_reg *arg1 )
591 if (!(mask & WRITEMASK_XYZW))
592 return; /* Do not emit dead code */
594 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
596 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
597 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
598 brw_MAC(p, dst[0], arg0[2], arg1[2]);
600 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
601 brw_ADD(p, dst[0], dst[0], arg1[3]);
602 brw_set_saturate(p, 0);
606 static void emit_xpd( struct brw_compile *p,
607 const struct brw_reg *dst,
609 const struct brw_reg *arg0,
610 const struct brw_reg *arg1 )
614 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
616 for (i = 0 ; i < 3; i++) {
621 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
623 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
624 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
625 brw_set_saturate(p, 0);
631 static void emit_math1( struct brw_compile *p,
633 const struct brw_reg *dst,
635 const struct brw_reg *arg0 )
637 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
639 if (!(mask & WRITEMASK_XYZW))
640 return; /* Do not emit dead code */
642 assert(is_power_of_two(mask & WRITEMASK_XYZW));
644 brw_MOV(p, brw_message_reg(2), arg0[0]);
646 /* Send two messages to perform all 16 operations:
651 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
654 BRW_MATH_PRECISION_FULL);
658 static void emit_math2( struct brw_compile *p,
660 const struct brw_reg *dst,
662 const struct brw_reg *arg0,
663 const struct brw_reg *arg1)
665 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
667 if (!(mask & WRITEMASK_XYZW))
668 return; /* Do not emit dead code */
670 assert(is_power_of_two(mask & WRITEMASK_XYZW));
672 brw_push_insn_state(p);
674 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
675 brw_MOV(p, brw_message_reg(2), arg0[0]);
676 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
677 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
679 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
680 brw_MOV(p, brw_message_reg(3), arg1[0]);
681 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
682 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
685 /* Send two messages to perform all 16 operations:
687 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
691 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
694 BRW_MATH_DATA_VECTOR,
695 BRW_MATH_PRECISION_FULL);
697 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
699 offset(dst[dst_chan],1),
701 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
704 BRW_MATH_DATA_VECTOR,
705 BRW_MATH_PRECISION_FULL);
707 brw_pop_insn_state(p);
712 static void emit_tex( struct brw_wm_compile *c,
713 const struct brw_wm_instruction *inst,
716 struct brw_reg *arg )
718 struct brw_compile *p = &c->func;
719 GLuint msgLength, responseLength;
724 /* How many input regs are there?
726 switch (inst->tex_idx) {
727 case TEXTURE_1D_INDEX:
731 case TEXTURE_2D_INDEX:
732 case TEXTURE_RECT_INDEX:
737 emit = WRITEMASK_XYZ;
742 if (inst->tex_shadow) {
749 for (i = 0; i < nr; i++) {
750 static const GLuint swz[4] = {0,1,2,2};
752 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
754 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
758 responseLength = 8; /* always */
760 if (BRW_IS_IGDNG(p->brw)) {
761 if (inst->tex_shadow)
762 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
764 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
766 if (inst->tex_shadow)
767 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
769 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
773 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
775 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
776 SURF_INDEX_TEXTURE(inst->tex_unit),
777 inst->tex_unit, /* sampler */
784 BRW_SAMPLER_SIMD_MODE_SIMD16);
788 static void emit_txb( struct brw_wm_compile *c,
789 const struct brw_wm_instruction *inst,
792 struct brw_reg *arg )
794 struct brw_compile *p = &c->func;
797 /* Shadow ignored for txb.
799 switch (inst->tex_idx) {
800 case TEXTURE_1D_INDEX:
801 brw_MOV(p, brw_message_reg(2), arg[0]);
802 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
803 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
805 case TEXTURE_2D_INDEX:
806 case TEXTURE_RECT_INDEX:
807 brw_MOV(p, brw_message_reg(2), arg[0]);
808 brw_MOV(p, brw_message_reg(4), arg[1]);
809 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
812 brw_MOV(p, brw_message_reg(2), arg[0]);
813 brw_MOV(p, brw_message_reg(4), arg[1]);
814 brw_MOV(p, brw_message_reg(6), arg[2]);
818 brw_MOV(p, brw_message_reg(8), arg[3]);
821 if (BRW_IS_IGDNG(p->brw))
822 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
824 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
827 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
829 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
830 SURF_INDEX_TEXTURE(inst->tex_unit),
831 inst->tex_unit, /* sampler */
834 8, /* responseLength */
838 BRW_SAMPLER_SIMD_MODE_SIMD16);
842 static void emit_lit( struct brw_compile *p,
843 const struct brw_reg *dst,
845 const struct brw_reg *arg0 )
847 assert((mask & WRITEMASK_XW) == 0);
849 if (mask & WRITEMASK_Y) {
850 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
851 brw_MOV(p, dst[1], arg0[0]);
852 brw_set_saturate(p, 0);
855 if (mask & WRITEMASK_Z) {
856 emit_math2(p, BRW_MATH_FUNCTION_POW,
858 WRITEMASK_X | (mask & SATURATE),
863 /* Ordinarily you'd use an iff statement to skip or shortcircuit
864 * some of the POW calculations above, but 16-wide iff statements
865 * seem to lock c1 hardware, so this is a nasty workaround:
867 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
869 if (mask & WRITEMASK_Y)
870 brw_MOV(p, dst[1], brw_imm_f(0));
872 if (mask & WRITEMASK_Z)
873 brw_MOV(p, dst[2], brw_imm_f(0));
875 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
879 /* Kill pixel - set execution mask to zero for those pixels which
882 static void emit_kil( struct brw_wm_compile *c,
883 struct brw_reg *arg0)
885 struct brw_compile *p = &c->func;
886 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
889 /* XXX - usually won't need 4 compares!
891 for (i = 0; i < 4; i++) {
892 brw_push_insn_state(p);
893 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
894 brw_set_predicate_control_flag_value(p, 0xff);
895 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
896 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
897 brw_pop_insn_state(p);
902 static void fire_fb_write( struct brw_wm_compile *c,
908 struct brw_compile *p = &c->func;
910 /* Pass through control information:
912 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
914 brw_push_insn_state(p);
915 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
916 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
918 brw_message_reg(base_reg + 1),
920 brw_pop_insn_state(p);
923 /* Send framebuffer write message: */
924 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
926 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
928 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
936 static void emit_aa( struct brw_wm_compile *c,
937 struct brw_reg *arg1,
940 struct brw_compile *p = &c->func;
941 GLuint comp = c->key.aa_dest_stencil_reg / 2;
942 GLuint off = c->key.aa_dest_stencil_reg % 2;
943 struct brw_reg aa = offset(arg1[comp], off);
945 brw_push_insn_state(p);
946 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
947 brw_MOV(p, brw_message_reg(reg), aa);
948 brw_pop_insn_state(p);
952 /* Post-fragment-program processing. Send the results to the
954 * \param arg0 the fragment color
955 * \param arg1 the pass-through depth value
956 * \param arg2 the shader-computed depth value
958 static void emit_fb_write( struct brw_wm_compile *c,
959 struct brw_reg *arg0,
960 struct brw_reg *arg1,
961 struct brw_reg *arg2,
965 struct brw_compile *p = &c->func;
969 /* Reserve a space for AA - may not be needed:
971 if (c->key.aa_dest_stencil_reg)
974 /* I don't really understand how this achieves the color interleave
975 * (ie RGBARGBA) in the result: [Do the saturation here]
978 brw_push_insn_state(p);
980 for (channel = 0; channel < 4; channel++) {
981 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
982 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
984 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
986 brw_message_reg(nr + channel),
989 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
991 brw_message_reg(nr + channel + 4),
992 sechalf(arg0[channel]));
995 /* skip over the regs populated above:
999 brw_pop_insn_state(p);
1002 if (c->key.source_depth_to_render_target)
1004 if (c->key.computes_depth)
1005 brw_MOV(p, brw_message_reg(nr), arg2[2]);
1007 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1012 if (c->key.dest_depth_reg)
1014 GLuint comp = c->key.dest_depth_reg / 2;
1015 GLuint off = c->key.dest_depth_reg % 2;
1018 brw_push_insn_state(p);
1019 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1021 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1023 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1024 brw_pop_insn_state(p);
1027 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1032 if (!c->key.runtime_check_aads_emit) {
1033 if (c->key.aa_dest_stencil_reg)
1034 emit_aa(c, arg1, 2);
1036 fire_fb_write(c, 0, nr, target, eot);
1039 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1040 struct brw_reg ip = brw_ip_reg();
1041 struct brw_instruction *jmp;
1043 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1044 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1047 get_element_ud(brw_vec8_grf(1,0), 6),
1050 jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1052 emit_aa(c, arg1, 2);
1053 fire_fb_write(c, 0, nr, target, eot);
1054 /* note - thread killed in subroutine */
1056 brw_land_fwd_jump(p, jmp);
1058 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1060 fire_fb_write(c, 1, nr-1, target, eot);
1066 * Move a GPR to scratch memory.
1068 static void emit_spill( struct brw_wm_compile *c,
1072 struct brw_compile *p = &c->func;
1075 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1077 brw_MOV(p, brw_message_reg(2), reg);
1080 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1081 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1084 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1090 * Load a GPR from scratch memory.
1092 static void emit_unspill( struct brw_wm_compile *c,
1096 struct brw_compile *p = &c->func;
1098 /* Slot 0 is the undef value.
1101 brw_MOV(p, reg, brw_imm_f(0));
1106 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1107 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1111 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1117 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1118 * Args with unspill_reg != 0 will be loaded from scratch memory.
1120 static void get_argument_regs( struct brw_wm_compile *c,
1121 struct brw_wm_ref *arg[],
1122 struct brw_reg *regs )
1126 for (i = 0; i < 4; i++) {
1128 if (arg[i]->unspill_reg)
1130 brw_vec8_grf(arg[i]->unspill_reg, 0),
1131 arg[i]->value->spill_slot);
1133 regs[i] = arg[i]->hw_reg;
1136 regs[i] = brw_null_reg();
1143 * For values that have a spill_slot!=0, write those regs to scratch memory.
1145 static void spill_values( struct brw_wm_compile *c,
1146 struct brw_wm_value *values,
1151 for (i = 0; i < nr; i++)
1152 if (values[i].spill_slot)
1153 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1157 /* Emit the fragment program instructions here.
1159 void brw_wm_emit( struct brw_wm_compile *c )
1161 struct brw_compile *p = &c->func;
1164 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1166 /* Check if any of the payload regs need to be spilled:
1168 spill_values(c, c->payload.depth, 4);
1169 spill_values(c, c->creg, c->nr_creg);
1170 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1173 for (insn = 0; insn < c->nr_insns; insn++) {
1175 struct brw_wm_instruction *inst = &c->instruction[insn];
1176 struct brw_reg args[3][4], dst[4];
1177 GLuint i, dst_flags;
1179 /* Get argument regs:
1181 for (i = 0; i < 3; i++)
1182 get_argument_regs(c, inst->src[i], args[i]);
1186 for (i = 0; i < 4; i++)
1188 dst[i] = inst->dst[i]->hw_reg;
1190 dst[i] = brw_null_reg();
1194 dst_flags = inst->writemask;
1196 dst_flags |= SATURATE;
1198 switch (inst->opcode) {
1199 /* Generated instructions for calculating triangle interpolants:
1202 emit_pixel_xy(p, dst, dst_flags);
1206 emit_delta_xy(p, dst, dst_flags, args[0]);
1210 emit_wpos_xy(c, dst, dst_flags, args[0]);
1214 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1218 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1222 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1226 emit_cinterp(p, dst, dst_flags, args[0]);
1230 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1233 case WM_FRONTFACING:
1234 emit_frontfacing(p, dst, dst_flags);
1237 /* Straightforward arithmetic:
1240 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1244 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1248 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1252 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1256 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1260 emit_dph(p, dst, dst_flags, args[0], args[1]);
1264 emit_trunc(p, dst, dst_flags, args[0]);
1268 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1272 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1277 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1281 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1285 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1288 /* Higher math functions:
1291 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1295 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1299 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1303 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1307 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1311 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1315 /* There is an scs math function, but it would need some
1316 * fixup for 16-element execution.
1318 if (dst_flags & WRITEMASK_X)
1319 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1320 if (dst_flags & WRITEMASK_Y)
1321 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1325 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1331 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1335 emit_max(p, dst, dst_flags, args[0], args[1]);
1339 emit_min(p, dst, dst_flags, args[0], args[1]);
1343 emit_slt(p, dst, dst_flags, args[0], args[1]);
1347 emit_sle(p, dst, dst_flags, args[0], args[1]);
1350 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1353 emit_sge(p, dst, dst_flags, args[0], args[1]);
1356 emit_seq(p, dst, dst_flags, args[0], args[1]);
1359 emit_sne(p, dst, dst_flags, args[0], args[1]);
1363 emit_lit(p, dst, dst_flags, args[0]);
1366 /* Texturing operations:
1369 emit_tex(c, inst, dst, dst_flags, args[0]);
1373 emit_txb(c, inst, dst, dst_flags, args[0]);
1377 emit_kil(c, args[0]);
1381 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1382 inst->opcode, inst->opcode < MAX_OPCODE ?
1383 _mesa_opcode_string(inst->opcode) :
1387 for (i = 0; i < 4; i++)
1388 if (inst->dst[i] && inst->dst[i]->spill_slot)
1390 inst->dst[i]->hw_reg,
1391 inst->dst[i]->spill_slot);
1394 if (INTEL_DEBUG & DEBUG_WM) {
1397 _mesa_printf("wm-native:\n");
1398 for (i = 0; i < p->nr_insn; i++)
1399 brw_disasm(stderr, &p->store[i]);