Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4  develop this 3D driver.
5  
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13  
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17  
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keith@tungstengraphics.com>
30   */
31      
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "../glsl/ralloc.h"
38
39 /***********************************************************************
40  * Internal helper for constructing instructions
41  */
42
43 static void guess_execution_size(struct brw_compile *p,
44                                  struct brw_instruction *insn,
45                                  struct brw_reg reg)
46 {
47    if (reg.width == BRW_WIDTH_8 && p->compressed)
48       insn->header.execution_size = BRW_EXECUTE_16;
49    else
50       insn->header.execution_size = reg.width;  /* note - definitions are compatible */
51 }
52
53
54 /**
55  * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56  * registers, implicitly moving the operand to a message register.
57  *
58  * On Sandybridge, this is no longer the case.  This function performs the
59  * explicit move; it should be called before emitting a SEND instruction.
60  */
61 static void
62 gen6_resolve_implied_move(struct brw_compile *p,
63                           struct brw_reg *src,
64                           GLuint msg_reg_nr)
65 {
66    struct intel_context *intel = &p->brw->intel;
67    if (intel->gen < 6)
68       return;
69
70    if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
71       brw_push_insn_state(p);
72       brw_set_mask_control(p, BRW_MASK_DISABLE);
73       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
74       brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
75               retype(*src, BRW_REGISTER_TYPE_UD));
76       brw_pop_insn_state(p);
77    }
78    *src = brw_message_reg(msg_reg_nr);
79 }
80
81 static void
82 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
83 {
84    struct intel_context *intel = &p->brw->intel;
85    if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
86       reg->file = BRW_GENERAL_REGISTER_FILE;
87       reg->nr += 111;
88    }
89 }
90
91
92 static void brw_set_dest(struct brw_compile *p,
93                          struct brw_instruction *insn,
94                          struct brw_reg dest)
95 {
96    if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
97        dest.file != BRW_MESSAGE_REGISTER_FILE)
98       assert(dest.nr < 128);
99
100    gen7_convert_mrf_to_grf(p, &dest);
101
102    insn->bits1.da1.dest_reg_file = dest.file;
103    insn->bits1.da1.dest_reg_type = dest.type;
104    insn->bits1.da1.dest_address_mode = dest.address_mode;
105
106    if (dest.address_mode == BRW_ADDRESS_DIRECT) {   
107       insn->bits1.da1.dest_reg_nr = dest.nr;
108
109       if (insn->header.access_mode == BRW_ALIGN_1) {
110          insn->bits1.da1.dest_subreg_nr = dest.subnr;
111          if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
112             dest.hstride = BRW_HORIZONTAL_STRIDE_1;
113          insn->bits1.da1.dest_horiz_stride = dest.hstride;
114       }
115       else {
116          insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
117          insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
118          /* even ignored in da16, still need to set as '01' */
119          insn->bits1.da16.dest_horiz_stride = 1;
120       }
121    }
122    else {
123       insn->bits1.ia1.dest_subreg_nr = dest.subnr;
124
125       /* These are different sizes in align1 vs align16:
126        */
127       if (insn->header.access_mode == BRW_ALIGN_1) {
128          insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
129          if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
130             dest.hstride = BRW_HORIZONTAL_STRIDE_1;
131          insn->bits1.ia1.dest_horiz_stride = dest.hstride;
132       }
133       else {
134          insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
135          /* even ignored in da16, still need to set as '01' */
136          insn->bits1.ia16.dest_horiz_stride = 1;
137       }
138    }
139
140    /* NEW: Set the execution size based on dest.width and
141     * insn->compression_control:
142     */
143    guess_execution_size(p, insn, dest);
144 }
145
146 extern int reg_type_size[];
147
148 static void
149 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
150 {
151    int hstride_for_reg[] = {0, 1, 2, 4};
152    int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153    int width_for_reg[] = {1, 2, 4, 8, 16};
154    int execsize_for_reg[] = {1, 2, 4, 8, 16};
155    int width, hstride, vstride, execsize;
156
157    if (reg.file == BRW_IMMEDIATE_VALUE) {
158       /* 3.3.6: Region Parameters.  Restriction: Immediate vectors
159        * mean the destination has to be 128-bit aligned and the
160        * destination horiz stride has to be a word.
161        */
162       if (reg.type == BRW_REGISTER_TYPE_V) {
163          assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
164                 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
165       }
166
167       return;
168    }
169
170    if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
171        reg.file == BRW_ARF_NULL)
172       return;
173
174    assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
175    hstride = hstride_for_reg[reg.hstride];
176
177    if (reg.vstride == 0xf) {
178       vstride = -1;
179    } else {
180       assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
181       vstride = vstride_for_reg[reg.vstride];
182    }
183
184    assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
185    width = width_for_reg[reg.width];
186
187    assert(insn->header.execution_size >= 0 &&
188           insn->header.execution_size < Elements(execsize_for_reg));
189    execsize = execsize_for_reg[insn->header.execution_size];
190
191    /* Restrictions from 3.3.10: Register Region Restrictions. */
192    /* 3. */
193    assert(execsize >= width);
194
195    /* 4. */
196    if (execsize == width && hstride != 0) {
197       assert(vstride == -1 || vstride == width * hstride);
198    }
199
200    /* 5. */
201    if (execsize == width && hstride == 0) {
202       /* no restriction on vstride. */
203    }
204
205    /* 6. */
206    if (width == 1) {
207       assert(hstride == 0);
208    }
209
210    /* 7. */
211    if (execsize == 1 && width == 1) {
212       assert(hstride == 0);
213       assert(vstride == 0);
214    }
215
216    /* 8. */
217    if (vstride == 0 && hstride == 0) {
218       assert(width == 1);
219    }
220
221    /* 10. Check destination issues. */
222 }
223
224 static void brw_set_src0(struct brw_compile *p,
225                          struct brw_instruction *insn,
226                          struct brw_reg reg)
227 {
228    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
229       assert(reg.nr < 128);
230
231    gen7_convert_mrf_to_grf(p, &reg);
232
233    validate_reg(insn, reg);
234
235    insn->bits1.da1.src0_reg_file = reg.file;
236    insn->bits1.da1.src0_reg_type = reg.type;
237    insn->bits2.da1.src0_abs = reg.abs;
238    insn->bits2.da1.src0_negate = reg.negate;
239    insn->bits2.da1.src0_address_mode = reg.address_mode;
240
241    if (reg.file == BRW_IMMEDIATE_VALUE) {
242       insn->bits3.ud = reg.dw1.ud;
243    
244       /* Required to set some fields in src1 as well:
245        */
246       insn->bits1.da1.src1_reg_file = 0; /* arf */
247       insn->bits1.da1.src1_reg_type = reg.type;
248    }
249    else 
250    {
251       if (reg.address_mode == BRW_ADDRESS_DIRECT) {
252          if (insn->header.access_mode == BRW_ALIGN_1) {
253             insn->bits2.da1.src0_subreg_nr = reg.subnr;
254             insn->bits2.da1.src0_reg_nr = reg.nr;
255          }
256          else {
257             insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
258             insn->bits2.da16.src0_reg_nr = reg.nr;
259          }
260       }
261       else {
262          insn->bits2.ia1.src0_subreg_nr = reg.subnr;
263
264          if (insn->header.access_mode == BRW_ALIGN_1) {
265             insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 
266          }
267          else {
268             insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
269          }
270       }
271
272       if (insn->header.access_mode == BRW_ALIGN_1) {
273          if (reg.width == BRW_WIDTH_1 && 
274              insn->header.execution_size == BRW_EXECUTE_1) {
275             insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
276             insn->bits2.da1.src0_width = BRW_WIDTH_1;
277             insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
278          }
279          else {
280             insn->bits2.da1.src0_horiz_stride = reg.hstride;
281             insn->bits2.da1.src0_width = reg.width;
282             insn->bits2.da1.src0_vert_stride = reg.vstride;
283          }
284       }
285       else {
286          insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
287          insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
288          insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
289          insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
290
291          /* This is an oddity of the fact we're using the same
292           * descriptions for registers in align_16 as align_1:
293           */
294          if (reg.vstride == BRW_VERTICAL_STRIDE_8)
295             insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
296          else
297             insn->bits2.da16.src0_vert_stride = reg.vstride;
298       }
299    }
300 }
301
302
303 void brw_set_src1(struct brw_compile *p,
304                   struct brw_instruction *insn,
305                   struct brw_reg reg)
306 {
307    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
308
309    assert(reg.nr < 128);
310
311    gen7_convert_mrf_to_grf(p, &reg);
312
313    validate_reg(insn, reg);
314
315    insn->bits1.da1.src1_reg_file = reg.file;
316    insn->bits1.da1.src1_reg_type = reg.type;
317    insn->bits3.da1.src1_abs = reg.abs;
318    insn->bits3.da1.src1_negate = reg.negate;
319
320    /* Only src1 can be immediate in two-argument instructions.
321     */
322    assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
323
324    if (reg.file == BRW_IMMEDIATE_VALUE) {
325       insn->bits3.ud = reg.dw1.ud;
326    }
327    else {
328       /* This is a hardware restriction, which may or may not be lifted
329        * in the future:
330        */
331       assert (reg.address_mode == BRW_ADDRESS_DIRECT);
332       /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
333
334       if (insn->header.access_mode == BRW_ALIGN_1) {
335          insn->bits3.da1.src1_subreg_nr = reg.subnr;
336          insn->bits3.da1.src1_reg_nr = reg.nr;
337       }
338       else {
339          insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
340          insn->bits3.da16.src1_reg_nr = reg.nr;
341       }
342
343       if (insn->header.access_mode == BRW_ALIGN_1) {
344          if (reg.width == BRW_WIDTH_1 && 
345              insn->header.execution_size == BRW_EXECUTE_1) {
346             insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
347             insn->bits3.da1.src1_width = BRW_WIDTH_1;
348             insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
349          }
350          else {
351             insn->bits3.da1.src1_horiz_stride = reg.hstride;
352             insn->bits3.da1.src1_width = reg.width;
353             insn->bits3.da1.src1_vert_stride = reg.vstride;
354          }
355       }
356       else {
357          insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
358          insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
359          insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
360          insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
361
362          /* This is an oddity of the fact we're using the same
363           * descriptions for registers in align_16 as align_1:
364           */
365          if (reg.vstride == BRW_VERTICAL_STRIDE_8)
366             insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
367          else
368             insn->bits3.da16.src1_vert_stride = reg.vstride;
369       }
370    }
371 }
372
373
374
375 static void brw_set_math_message( struct brw_compile *p,
376                                   struct brw_instruction *insn,
377                                   GLuint msg_length,
378                                   GLuint response_length,
379                                   GLuint function,
380                                   GLuint integer_type,
381                                   GLboolean low_precision,
382                                   GLboolean saturate,
383                                   GLuint dataType )
384 {
385    struct brw_context *brw = p->brw;
386    struct intel_context *intel = &brw->intel;
387    brw_set_src1(p, insn, brw_imm_d(0));
388
389    if (intel->gen == 5) {
390       insn->bits3.math_gen5.function = function;
391       insn->bits3.math_gen5.int_type = integer_type;
392       insn->bits3.math_gen5.precision = low_precision;
393       insn->bits3.math_gen5.saturate = saturate;
394       insn->bits3.math_gen5.data_type = dataType;
395       insn->bits3.math_gen5.snapshot = 0;
396       insn->bits3.math_gen5.header_present = 0;
397       insn->bits3.math_gen5.response_length = response_length;
398       insn->bits3.math_gen5.msg_length = msg_length;
399       insn->bits3.math_gen5.end_of_thread = 0;
400       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
401       insn->bits2.send_gen5.end_of_thread = 0;
402    } else {
403       insn->bits3.math.function = function;
404       insn->bits3.math.int_type = integer_type;
405       insn->bits3.math.precision = low_precision;
406       insn->bits3.math.saturate = saturate;
407       insn->bits3.math.data_type = dataType;
408       insn->bits3.math.response_length = response_length;
409       insn->bits3.math.msg_length = msg_length;
410       insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
411       insn->bits3.math.end_of_thread = 0;
412    }
413 }
414
415
416 static void brw_set_ff_sync_message(struct brw_compile *p,
417                                     struct brw_instruction *insn,
418                                     GLboolean allocate,
419                                     GLuint response_length,
420                                     GLboolean end_of_thread)
421 {
422    struct brw_context *brw = p->brw;
423    struct intel_context *intel = &brw->intel;
424    brw_set_src1(p, insn, brw_imm_d(0));
425
426    insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
427    insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
428    insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
429    insn->bits3.urb_gen5.allocate = allocate;
430    insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
431    insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
432    insn->bits3.urb_gen5.header_present = 1;
433    insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
434    insn->bits3.urb_gen5.msg_length = 1;
435    insn->bits3.urb_gen5.end_of_thread = end_of_thread;
436    if (intel->gen >= 6) {
437       insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
438    } else {
439       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
440       insn->bits2.send_gen5.end_of_thread = end_of_thread;
441    }
442 }
443
444 static void brw_set_urb_message( struct brw_compile *p,
445                                  struct brw_instruction *insn,
446                                  GLboolean allocate,
447                                  GLboolean used,
448                                  GLuint msg_length,
449                                  GLuint response_length,
450                                  GLboolean end_of_thread,
451                                  GLboolean complete,
452                                  GLuint offset,
453                                  GLuint swizzle_control )
454 {
455    struct brw_context *brw = p->brw;
456    struct intel_context *intel = &brw->intel;
457    brw_set_src1(p, insn, brw_imm_d(0));
458
459    if (intel->gen == 7) {
460       insn->bits3.urb_gen7.opcode = 0;  /* URB_WRITE_HWORD */
461       insn->bits3.urb_gen7.offset = offset;
462       assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
463       insn->bits3.urb_gen7.swizzle_control = swizzle_control;
464       /* per_slot_offset = 0 makes it ignore offsets in message header */
465       insn->bits3.urb_gen7.per_slot_offset = 0;
466       insn->bits3.urb_gen7.complete = complete;
467       insn->bits3.urb_gen7.header_present = 1;
468       insn->bits3.urb_gen7.response_length = response_length;
469       insn->bits3.urb_gen7.msg_length = msg_length;
470       insn->bits3.urb_gen7.end_of_thread = end_of_thread;
471       insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
472    } else if (intel->gen >= 5) {
473       insn->bits3.urb_gen5.opcode = 0;  /* URB_WRITE */
474       insn->bits3.urb_gen5.offset = offset;
475       insn->bits3.urb_gen5.swizzle_control = swizzle_control;
476       insn->bits3.urb_gen5.allocate = allocate;
477       insn->bits3.urb_gen5.used = used; /* ? */
478       insn->bits3.urb_gen5.complete = complete;
479       insn->bits3.urb_gen5.header_present = 1;
480       insn->bits3.urb_gen5.response_length = response_length;
481       insn->bits3.urb_gen5.msg_length = msg_length;
482       insn->bits3.urb_gen5.end_of_thread = end_of_thread;
483       if (intel->gen >= 6) {
484          /* For SNB, the SFID bits moved to the condmod bits, and
485           * EOT stayed in bits3 above.  Does the EOT bit setting
486           * below on Ironlake even do anything?
487           */
488          insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
489       } else {
490          insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
491          insn->bits2.send_gen5.end_of_thread = end_of_thread;
492       }
493    } else {
494       insn->bits3.urb.opcode = 0;       /* ? */
495       insn->bits3.urb.offset = offset;
496       insn->bits3.urb.swizzle_control = swizzle_control;
497       insn->bits3.urb.allocate = allocate;
498       insn->bits3.urb.used = used;      /* ? */
499       insn->bits3.urb.complete = complete;
500       insn->bits3.urb.response_length = response_length;
501       insn->bits3.urb.msg_length = msg_length;
502       insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
503       insn->bits3.urb.end_of_thread = end_of_thread;
504    }
505 }
506
507 static void brw_set_dp_write_message( struct brw_compile *p,
508                                       struct brw_instruction *insn,
509                                       GLuint binding_table_index,
510                                       GLuint msg_control,
511                                       GLuint msg_type,
512                                       GLuint msg_length,
513                                       GLboolean header_present,
514                                       GLuint pixel_scoreboard_clear,
515                                       GLuint response_length,
516                                       GLuint end_of_thread,
517                                       GLuint send_commit_msg)
518 {
519    struct brw_context *brw = p->brw;
520    struct intel_context *intel = &brw->intel;
521    brw_set_src1(p, insn, brw_imm_ud(0));
522
523    if (intel->gen >= 7) {
524       insn->bits3.gen7_dp.binding_table_index = binding_table_index;
525       insn->bits3.gen7_dp.msg_control = msg_control;
526       insn->bits3.gen7_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
527       insn->bits3.gen7_dp.msg_type = msg_type;
528       insn->bits3.gen7_dp.header_present = header_present;
529       insn->bits3.gen7_dp.response_length = response_length;
530       insn->bits3.gen7_dp.msg_length = msg_length;
531       insn->bits3.gen7_dp.end_of_thread = end_of_thread;
532
533       /* We always use the render cache for write messages */
534       insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
535    } else if (intel->gen == 6) {
536       insn->bits3.gen6_dp.binding_table_index = binding_table_index;
537       insn->bits3.gen6_dp.msg_control = msg_control;
538       insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
539       insn->bits3.gen6_dp.msg_type = msg_type;
540       insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
541       insn->bits3.gen6_dp.header_present = header_present;
542       insn->bits3.gen6_dp.response_length = response_length;
543       insn->bits3.gen6_dp.msg_length = msg_length;
544       insn->bits3.gen6_dp.end_of_thread = end_of_thread;
545
546       /* We always use the render cache for write messages */
547       insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
548    } else if (intel->gen == 5) {
549       insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
550       insn->bits3.dp_write_gen5.msg_control = msg_control;
551       insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
552       insn->bits3.dp_write_gen5.msg_type = msg_type;
553       insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
554       insn->bits3.dp_write_gen5.header_present = header_present;
555       insn->bits3.dp_write_gen5.response_length = response_length;
556       insn->bits3.dp_write_gen5.msg_length = msg_length;
557       insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
558       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
559       insn->bits2.send_gen5.end_of_thread = end_of_thread;
560    } else {
561       insn->bits3.dp_write.binding_table_index = binding_table_index;
562       insn->bits3.dp_write.msg_control = msg_control;
563       insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
564       insn->bits3.dp_write.msg_type = msg_type;
565       insn->bits3.dp_write.send_commit_msg = send_commit_msg;
566       insn->bits3.dp_write.response_length = response_length;
567       insn->bits3.dp_write.msg_length = msg_length;
568       insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
569       insn->bits3.dp_write.end_of_thread = end_of_thread;
570    }
571 }
572
573 static void
574 brw_set_dp_read_message(struct brw_compile *p,
575                         struct brw_instruction *insn,
576                         GLuint binding_table_index,
577                         GLuint msg_control,
578                         GLuint msg_type,
579                         GLuint target_cache,
580                         GLuint msg_length,
581                         GLuint response_length)
582 {
583    struct brw_context *brw = p->brw;
584    struct intel_context *intel = &brw->intel;
585    brw_set_src1(p, insn, brw_imm_d(0));
586
587    if (intel->gen >= 7) {
588       insn->bits3.gen7_dp.binding_table_index = binding_table_index;
589       insn->bits3.gen7_dp.msg_control = msg_control;
590       insn->bits3.gen7_dp.pixel_scoreboard_clear = 0;
591       insn->bits3.gen7_dp.msg_type = msg_type;
592       insn->bits3.gen7_dp.header_present = 1;
593       insn->bits3.gen7_dp.response_length = response_length;
594       insn->bits3.gen7_dp.msg_length = msg_length;
595       insn->bits3.gen7_dp.end_of_thread = 0;
596       insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_CONST_CACHE;
597    } else if (intel->gen == 6) {
598       uint32_t target_function;
599
600       if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
601          target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE;
602       else
603          target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
604
605       insn->bits3.gen6_dp.binding_table_index = binding_table_index;
606       insn->bits3.gen6_dp.msg_control = msg_control;
607       insn->bits3.gen6_dp.pixel_scoreboard_clear = 0;
608       insn->bits3.gen6_dp.msg_type = msg_type;
609       insn->bits3.gen6_dp.send_commit_msg = 0;
610       insn->bits3.gen6_dp.header_present = 1;
611       insn->bits3.gen6_dp.response_length = response_length;
612       insn->bits3.gen6_dp.msg_length = msg_length;
613       insn->bits3.gen6_dp.end_of_thread = 0;
614       insn->header.destreg__conditionalmod = target_function;
615    } else if (intel->gen == 5) {
616       insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
617       insn->bits3.dp_read_gen5.msg_control = msg_control;
618       insn->bits3.dp_read_gen5.msg_type = msg_type;
619       insn->bits3.dp_read_gen5.target_cache = target_cache;
620       insn->bits3.dp_read_gen5.header_present = 1;
621       insn->bits3.dp_read_gen5.response_length = response_length;
622       insn->bits3.dp_read_gen5.msg_length = msg_length;
623       insn->bits3.dp_read_gen5.pad1 = 0;
624       insn->bits3.dp_read_gen5.end_of_thread = 0;
625       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
626       insn->bits2.send_gen5.end_of_thread = 0;
627    } else if (intel->is_g4x) {
628       insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
629       insn->bits3.dp_read_g4x.msg_control = msg_control;  /*8:10*/
630       insn->bits3.dp_read_g4x.msg_type = msg_type;  /*11:13*/
631       insn->bits3.dp_read_g4x.target_cache = target_cache;  /*14:15*/
632       insn->bits3.dp_read_g4x.response_length = response_length;  /*16:19*/
633       insn->bits3.dp_read_g4x.msg_length = msg_length;  /*20:23*/
634       insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
635       insn->bits3.dp_read_g4x.pad1 = 0;
636       insn->bits3.dp_read_g4x.end_of_thread = 0;
637    } else {
638       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
639       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
640       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
641       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
642       insn->bits3.dp_read.response_length = response_length;  /*16:19*/
643       insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
644       insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
645       insn->bits3.dp_read.pad1 = 0;  /*28:30*/
646       insn->bits3.dp_read.end_of_thread = 0;  /*31*/
647    }
648 }
649
650 static void brw_set_sampler_message(struct brw_compile *p,
651                                     struct brw_instruction *insn,
652                                     GLuint binding_table_index,
653                                     GLuint sampler,
654                                     GLuint msg_type,
655                                     GLuint response_length,
656                                     GLuint msg_length,
657                                     GLboolean eot,
658                                     GLuint header_present,
659                                     GLuint simd_mode)
660 {
661    struct brw_context *brw = p->brw;
662    struct intel_context *intel = &brw->intel;
663    assert(eot == 0);
664    brw_set_src1(p, insn, brw_imm_d(0));
665
666    if (intel->gen >= 7) {
667       insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
668       insn->bits3.sampler_gen7.sampler = sampler;
669       insn->bits3.sampler_gen7.msg_type = msg_type;
670       insn->bits3.sampler_gen7.simd_mode = simd_mode;
671       insn->bits3.sampler_gen7.header_present = header_present;
672       insn->bits3.sampler_gen7.response_length = response_length;
673       insn->bits3.sampler_gen7.msg_length = msg_length;
674       insn->bits3.sampler_gen7.end_of_thread = eot;
675       insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
676    } else if (intel->gen >= 5) {
677       insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
678       insn->bits3.sampler_gen5.sampler = sampler;
679       insn->bits3.sampler_gen5.msg_type = msg_type;
680       insn->bits3.sampler_gen5.simd_mode = simd_mode;
681       insn->bits3.sampler_gen5.header_present = header_present;
682       insn->bits3.sampler_gen5.response_length = response_length;
683       insn->bits3.sampler_gen5.msg_length = msg_length;
684       insn->bits3.sampler_gen5.end_of_thread = eot;
685       if (intel->gen >= 6)
686           insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
687       else {
688           insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
689           insn->bits2.send_gen5.end_of_thread = eot;
690       }
691    } else if (intel->is_g4x) {
692       insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
693       insn->bits3.sampler_g4x.sampler = sampler;
694       insn->bits3.sampler_g4x.msg_type = msg_type;
695       insn->bits3.sampler_g4x.response_length = response_length;
696       insn->bits3.sampler_g4x.msg_length = msg_length;
697       insn->bits3.sampler_g4x.end_of_thread = eot;
698       insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
699    } else {
700       insn->bits3.sampler.binding_table_index = binding_table_index;
701       insn->bits3.sampler.sampler = sampler;
702       insn->bits3.sampler.msg_type = msg_type;
703       insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
704       insn->bits3.sampler.response_length = response_length;
705       insn->bits3.sampler.msg_length = msg_length;
706       insn->bits3.sampler.end_of_thread = eot;
707       insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
708    }
709 }
710
711
712
713 static struct brw_instruction *next_insn( struct brw_compile *p, 
714                                           GLuint opcode )
715 {
716    struct brw_instruction *insn;
717
718    assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
719
720    insn = &p->store[p->nr_insn++];
721    memcpy(insn, p->current, sizeof(*insn));
722
723    /* Reset this one-shot flag: 
724     */
725
726    if (p->current->header.destreg__conditionalmod) {
727       p->current->header.destreg__conditionalmod = 0;
728       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
729    }
730
731    insn->header.opcode = opcode;
732    return insn;
733 }
734
735
736 static struct brw_instruction *brw_alu1( struct brw_compile *p,
737                                          GLuint opcode,
738                                          struct brw_reg dest,
739                                          struct brw_reg src )
740 {
741    struct brw_instruction *insn = next_insn(p, opcode);
742    brw_set_dest(p, insn, dest);
743    brw_set_src0(p, insn, src);
744    return insn;
745 }
746
747 static struct brw_instruction *brw_alu2(struct brw_compile *p,
748                                         GLuint opcode,
749                                         struct brw_reg dest,
750                                         struct brw_reg src0,
751                                         struct brw_reg src1 )
752 {
753    struct brw_instruction *insn = next_insn(p, opcode);   
754    brw_set_dest(p, insn, dest);
755    brw_set_src0(p, insn, src0);
756    brw_set_src1(p, insn, src1);
757    return insn;
758 }
759
760
761 /***********************************************************************
762  * Convenience routines.
763  */
764 #define ALU1(OP)                                        \
765 struct brw_instruction *brw_##OP(struct brw_compile *p, \
766               struct brw_reg dest,                      \
767               struct brw_reg src0)                      \
768 {                                                       \
769    return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);     \
770 }
771
772 #define ALU2(OP)                                        \
773 struct brw_instruction *brw_##OP(struct brw_compile *p, \
774               struct brw_reg dest,                      \
775               struct brw_reg src0,                      \
776               struct brw_reg src1)                      \
777 {                                                       \
778    return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);       \
779 }
780
781 /* Rounding operations (other than RNDD) require two instructions - the first
782  * stores a rounded value (possibly the wrong way) in the dest register, but
783  * also sets a per-channel "increment bit" in the flag register.  A predicated
784  * add of 1.0 fixes dest to contain the desired result.
785  *
786  * Sandybridge and later appear to round correctly without an ADD.
787  */
788 #define ROUND(OP)                                                             \
789 void brw_##OP(struct brw_compile *p,                                          \
790               struct brw_reg dest,                                            \
791               struct brw_reg src)                                             \
792 {                                                                             \
793    struct brw_instruction *rnd, *add;                                         \
794    rnd = next_insn(p, BRW_OPCODE_##OP);                                       \
795    brw_set_dest(p, rnd, dest);                                                \
796    brw_set_src0(p, rnd, src);                                                 \
797                                                                               \
798    if (p->brw->intel.gen < 6) {                                               \
799       /* turn on round-increments */                                          \
800       rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R;                \
801       add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));                          \
802       add->header.predicate_control = BRW_PREDICATE_NORMAL;                   \
803    }                                                                          \
804 }
805
806
807 ALU1(MOV)
808 ALU2(SEL)
809 ALU1(NOT)
810 ALU2(AND)
811 ALU2(OR)
812 ALU2(XOR)
813 ALU2(SHR)
814 ALU2(SHL)
815 ALU2(RSR)
816 ALU2(RSL)
817 ALU2(ASR)
818 ALU1(FRC)
819 ALU1(RNDD)
820 ALU2(MAC)
821 ALU2(MACH)
822 ALU1(LZD)
823 ALU2(DP4)
824 ALU2(DPH)
825 ALU2(DP3)
826 ALU2(DP2)
827 ALU2(LINE)
828 ALU2(PLN)
829
830
831 ROUND(RNDZ)
832 ROUND(RNDE)
833
834
835 struct brw_instruction *brw_ADD(struct brw_compile *p,
836                                 struct brw_reg dest,
837                                 struct brw_reg src0,
838                                 struct brw_reg src1)
839 {
840    /* 6.2.2: add */
841    if (src0.type == BRW_REGISTER_TYPE_F ||
842        (src0.file == BRW_IMMEDIATE_VALUE &&
843         src0.type == BRW_REGISTER_TYPE_VF)) {
844       assert(src1.type != BRW_REGISTER_TYPE_UD);
845       assert(src1.type != BRW_REGISTER_TYPE_D);
846    }
847
848    if (src1.type == BRW_REGISTER_TYPE_F ||
849        (src1.file == BRW_IMMEDIATE_VALUE &&
850         src1.type == BRW_REGISTER_TYPE_VF)) {
851       assert(src0.type != BRW_REGISTER_TYPE_UD);
852       assert(src0.type != BRW_REGISTER_TYPE_D);
853    }
854
855    return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
856 }
857
858 struct brw_instruction *brw_MUL(struct brw_compile *p,
859                                 struct brw_reg dest,
860                                 struct brw_reg src0,
861                                 struct brw_reg src1)
862 {
863    /* 6.32.38: mul */
864    if (src0.type == BRW_REGISTER_TYPE_D ||
865        src0.type == BRW_REGISTER_TYPE_UD ||
866        src1.type == BRW_REGISTER_TYPE_D ||
867        src1.type == BRW_REGISTER_TYPE_UD) {
868       assert(dest.type != BRW_REGISTER_TYPE_F);
869    }
870
871    if (src0.type == BRW_REGISTER_TYPE_F ||
872        (src0.file == BRW_IMMEDIATE_VALUE &&
873         src0.type == BRW_REGISTER_TYPE_VF)) {
874       assert(src1.type != BRW_REGISTER_TYPE_UD);
875       assert(src1.type != BRW_REGISTER_TYPE_D);
876    }
877
878    if (src1.type == BRW_REGISTER_TYPE_F ||
879        (src1.file == BRW_IMMEDIATE_VALUE &&
880         src1.type == BRW_REGISTER_TYPE_VF)) {
881       assert(src0.type != BRW_REGISTER_TYPE_UD);
882       assert(src0.type != BRW_REGISTER_TYPE_D);
883    }
884
885    assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
886           src0.nr != BRW_ARF_ACCUMULATOR);
887    assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
888           src1.nr != BRW_ARF_ACCUMULATOR);
889
890    return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
891 }
892
893
894 void brw_NOP(struct brw_compile *p)
895 {
896    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);   
897    brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
898    brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
899    brw_set_src1(p, insn, brw_imm_ud(0x0));
900 }
901
902
903
904
905
906 /***********************************************************************
907  * Comparisons, if/else/endif
908  */
909
910 struct brw_instruction *brw_JMPI(struct brw_compile *p, 
911                                  struct brw_reg dest,
912                                  struct brw_reg src0,
913                                  struct brw_reg src1)
914 {
915    struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
916
917    insn->header.execution_size = 1;
918    insn->header.compression_control = BRW_COMPRESSION_NONE;
919    insn->header.mask_control = BRW_MASK_DISABLE;
920
921    p->current->header.predicate_control = BRW_PREDICATE_NONE;
922
923    return insn;
924 }
925
926 static void
927 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
928 {
929    p->if_stack[p->if_stack_depth] = inst;
930
931    p->if_stack_depth++;
932    if (p->if_stack_array_size <= p->if_stack_depth) {
933       p->if_stack_array_size *= 2;
934       p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *,
935                              p->if_stack_array_size);
936    }
937 }
938
939 /* EU takes the value from the flag register and pushes it onto some
940  * sort of a stack (presumably merging with any flag value already on
941  * the stack).  Within an if block, the flags at the top of the stack
942  * control execution on each channel of the unit, eg. on each of the
943  * 16 pixel values in our wm programs.
944  *
945  * When the matching 'else' instruction is reached (presumably by
946  * countdown of the instruction count patched in by our ELSE/ENDIF
947  * functions), the relevent flags are inverted.
948  *
949  * When the matching 'endif' instruction is reached, the flags are
950  * popped off.  If the stack is now empty, normal execution resumes.
951  */
952 struct brw_instruction *
953 brw_IF(struct brw_compile *p, GLuint execute_size)
954 {
955    struct intel_context *intel = &p->brw->intel;
956    struct brw_instruction *insn;
957
958    insn = next_insn(p, BRW_OPCODE_IF);
959
960    /* Override the defaults for this instruction:
961     */
962    if (intel->gen < 6) {
963       brw_set_dest(p, insn, brw_ip_reg());
964       brw_set_src0(p, insn, brw_ip_reg());
965       brw_set_src1(p, insn, brw_imm_d(0x0));
966    } else if (intel->gen == 6) {
967       brw_set_dest(p, insn, brw_imm_w(0));
968       insn->bits1.branch_gen6.jump_count = 0;
969       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
970       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
971    } else {
972       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
973       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
974       brw_set_src1(p, insn, brw_imm_ud(0));
975       insn->bits3.break_cont.jip = 0;
976       insn->bits3.break_cont.uip = 0;
977    }
978
979    insn->header.execution_size = execute_size;
980    insn->header.compression_control = BRW_COMPRESSION_NONE;
981    insn->header.predicate_control = BRW_PREDICATE_NORMAL;
982    insn->header.mask_control = BRW_MASK_ENABLE;
983    if (!p->single_program_flow)
984       insn->header.thread_control = BRW_THREAD_SWITCH;
985
986    p->current->header.predicate_control = BRW_PREDICATE_NONE;
987
988    push_if_stack(p, insn);
989    return insn;
990 }
991
992 /* This function is only used for gen6-style IF instructions with an
993  * embedded comparison (conditional modifier).  It is not used on gen7.
994  */
995 struct brw_instruction *
996 gen6_IF(struct brw_compile *p, uint32_t conditional,
997         struct brw_reg src0, struct brw_reg src1)
998 {
999    struct brw_instruction *insn;
1000
1001    insn = next_insn(p, BRW_OPCODE_IF);
1002
1003    brw_set_dest(p, insn, brw_imm_w(0));
1004    if (p->compressed) {
1005       insn->header.execution_size = BRW_EXECUTE_16;
1006    } else {
1007       insn->header.execution_size = BRW_EXECUTE_8;
1008    }
1009    insn->bits1.branch_gen6.jump_count = 0;
1010    brw_set_src0(p, insn, src0);
1011    brw_set_src1(p, insn, src1);
1012
1013    assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1014    assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1015    insn->header.destreg__conditionalmod = conditional;
1016
1017    if (!p->single_program_flow)
1018       insn->header.thread_control = BRW_THREAD_SWITCH;
1019
1020    push_if_stack(p, insn);
1021    return insn;
1022 }
1023
1024 /**
1025  * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1026  */
1027 static void
1028 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1029                        struct brw_instruction *if_inst,
1030                        struct brw_instruction *else_inst)
1031 {
1032    /* The next instruction (where the ENDIF would be, if it existed) */
1033    struct brw_instruction *next_inst = &p->store[p->nr_insn];
1034
1035    assert(p->single_program_flow);
1036    assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1037    assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1038    assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1039
1040    /* Convert IF to an ADD instruction that moves the instruction pointer
1041     * to the first instruction of the ELSE block.  If there is no ELSE
1042     * block, point to where ENDIF would be.  Reverse the predicate.
1043     *
1044     * There's no need to execute an ENDIF since we don't need to do any
1045     * stack operations, and if we're currently executing, we just want to
1046     * continue normally.
1047     */
1048    if_inst->header.opcode = BRW_OPCODE_ADD;
1049    if_inst->header.predicate_inverse = 1;
1050
1051    if (else_inst != NULL) {
1052       /* Convert ELSE to an ADD instruction that points where the ENDIF
1053        * would be.
1054        */
1055       else_inst->header.opcode = BRW_OPCODE_ADD;
1056
1057       if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1058       else_inst->bits3.ud = (next_inst - else_inst) * 16;
1059    } else {
1060       if_inst->bits3.ud = (next_inst - if_inst) * 16;
1061    }
1062 }
1063
1064 /**
1065  * Patch IF and ELSE instructions with appropriate jump targets.
1066  */
1067 static void
1068 patch_IF_ELSE(struct brw_compile *p,
1069               struct brw_instruction *if_inst,
1070               struct brw_instruction *else_inst,
1071               struct brw_instruction *endif_inst)
1072 {
1073    struct intel_context *intel = &p->brw->intel;
1074
1075    assert(!p->single_program_flow);
1076    assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1077    assert(endif_inst != NULL);
1078    assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1079
1080    unsigned br = 1;
1081    /* Jump count is for 64bit data chunk each, so one 128bit instruction
1082     * requires 2 chunks.
1083     */
1084    if (intel->gen >= 5)
1085       br = 2;
1086
1087    assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1088    endif_inst->header.execution_size = if_inst->header.execution_size;
1089
1090    if (else_inst == NULL) {
1091       /* Patch IF -> ENDIF */
1092       if (intel->gen < 6) {
1093          /* Turn it into an IFF, which means no mask stack operations for
1094           * all-false and jumping past the ENDIF.
1095           */
1096          if_inst->header.opcode = BRW_OPCODE_IFF;
1097          if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1098          if_inst->bits3.if_else.pop_count = 0;
1099          if_inst->bits3.if_else.pad0 = 0;
1100       } else if (intel->gen == 6) {
1101          /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1102          if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1103       } else {
1104          if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1105          if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1106       }
1107    } else {
1108       else_inst->header.execution_size = if_inst->header.execution_size;
1109
1110       /* Patch IF -> ELSE */
1111       if (intel->gen < 6) {
1112          if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1113          if_inst->bits3.if_else.pop_count = 0;
1114          if_inst->bits3.if_else.pad0 = 0;
1115       } else if (intel->gen == 6) {
1116          if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1117       }
1118
1119       /* Patch ELSE -> ENDIF */
1120       if (intel->gen < 6) {
1121          /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1122           * matching ENDIF.
1123           */
1124          else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1125          else_inst->bits3.if_else.pop_count = 1;
1126          else_inst->bits3.if_else.pad0 = 0;
1127       } else if (intel->gen == 6) {
1128          /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1129          else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1130       } else {
1131          /* The IF instruction's JIP should point just past the ELSE */
1132          if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1133          /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1134          if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1135          else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1136       }
1137    }
1138 }
1139
1140 void
1141 brw_ELSE(struct brw_compile *p)
1142 {
1143    struct intel_context *intel = &p->brw->intel;
1144    struct brw_instruction *insn;
1145
1146    insn = next_insn(p, BRW_OPCODE_ELSE);
1147
1148    if (intel->gen < 6) {
1149       brw_set_dest(p, insn, brw_ip_reg());
1150       brw_set_src0(p, insn, brw_ip_reg());
1151       brw_set_src1(p, insn, brw_imm_d(0x0));
1152    } else if (intel->gen == 6) {
1153       brw_set_dest(p, insn, brw_imm_w(0));
1154       insn->bits1.branch_gen6.jump_count = 0;
1155       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1156       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1157    } else {
1158       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1159       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1160       brw_set_src1(p, insn, brw_imm_ud(0));
1161       insn->bits3.break_cont.jip = 0;
1162       insn->bits3.break_cont.uip = 0;
1163    }
1164
1165    insn->header.compression_control = BRW_COMPRESSION_NONE;
1166    insn->header.mask_control = BRW_MASK_ENABLE;
1167    if (!p->single_program_flow)
1168       insn->header.thread_control = BRW_THREAD_SWITCH;
1169
1170    push_if_stack(p, insn);
1171 }
1172
1173 void
1174 brw_ENDIF(struct brw_compile *p)
1175 {
1176    struct intel_context *intel = &p->brw->intel;
1177    struct brw_instruction *insn;
1178    struct brw_instruction *else_inst = NULL;
1179    struct brw_instruction *if_inst = NULL;
1180
1181    /* Pop the IF and (optional) ELSE instructions from the stack */
1182    p->if_stack_depth--;
1183    if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
1184       else_inst = p->if_stack[p->if_stack_depth];
1185       p->if_stack_depth--;
1186    }
1187    if_inst = p->if_stack[p->if_stack_depth];
1188
1189    if (p->single_program_flow) {
1190       /* ENDIF is useless; don't bother emitting it. */
1191       convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1192       return;
1193    }
1194
1195    insn = next_insn(p, BRW_OPCODE_ENDIF);
1196
1197    if (intel->gen < 6) {
1198       brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1199       brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1200       brw_set_src1(p, insn, brw_imm_d(0x0));
1201    } else if (intel->gen == 6) {
1202       brw_set_dest(p, insn, brw_imm_w(0));
1203       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1204       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1205    } else {
1206       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1207       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1208       brw_set_src1(p, insn, brw_imm_ud(0));
1209    }
1210
1211    insn->header.compression_control = BRW_COMPRESSION_NONE;
1212    insn->header.mask_control = BRW_MASK_ENABLE;
1213    insn->header.thread_control = BRW_THREAD_SWITCH;
1214
1215    /* Also pop item off the stack in the endif instruction: */
1216    if (intel->gen < 6) {
1217       insn->bits3.if_else.jump_count = 0;
1218       insn->bits3.if_else.pop_count = 1;
1219       insn->bits3.if_else.pad0 = 0;
1220    } else if (intel->gen == 6) {
1221       insn->bits1.branch_gen6.jump_count = 2;
1222    } else {
1223       insn->bits3.break_cont.jip = 2;
1224    }
1225    patch_IF_ELSE(p, if_inst, else_inst, insn);
1226 }
1227
1228 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1229 {
1230    struct intel_context *intel = &p->brw->intel;
1231    struct brw_instruction *insn;
1232
1233    insn = next_insn(p, BRW_OPCODE_BREAK);
1234    if (intel->gen >= 6) {
1235       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1236       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1237       brw_set_src1(p, insn, brw_imm_d(0x0));
1238    } else {
1239       brw_set_dest(p, insn, brw_ip_reg());
1240       brw_set_src0(p, insn, brw_ip_reg());
1241       brw_set_src1(p, insn, brw_imm_d(0x0));
1242       insn->bits3.if_else.pad0 = 0;
1243       insn->bits3.if_else.pop_count = pop_count;
1244    }
1245    insn->header.compression_control = BRW_COMPRESSION_NONE;
1246    insn->header.execution_size = BRW_EXECUTE_8;
1247
1248    return insn;
1249 }
1250
1251 struct brw_instruction *gen6_CONT(struct brw_compile *p,
1252                                   struct brw_instruction *do_insn)
1253 {
1254    struct brw_instruction *insn;
1255
1256    insn = next_insn(p, BRW_OPCODE_CONTINUE);
1257    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1258    brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1259    brw_set_dest(p, insn, brw_ip_reg());
1260    brw_set_src0(p, insn, brw_ip_reg());
1261    brw_set_src1(p, insn, brw_imm_d(0x0));
1262
1263    insn->header.compression_control = BRW_COMPRESSION_NONE;
1264    insn->header.execution_size = BRW_EXECUTE_8;
1265    return insn;
1266 }
1267
1268 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1269 {
1270    struct brw_instruction *insn;
1271    insn = next_insn(p, BRW_OPCODE_CONTINUE);
1272    brw_set_dest(p, insn, brw_ip_reg());
1273    brw_set_src0(p, insn, brw_ip_reg());
1274    brw_set_src1(p, insn, brw_imm_d(0x0));
1275    insn->header.compression_control = BRW_COMPRESSION_NONE;
1276    insn->header.execution_size = BRW_EXECUTE_8;
1277    /* insn->header.mask_control = BRW_MASK_DISABLE; */
1278    insn->bits3.if_else.pad0 = 0;
1279    insn->bits3.if_else.pop_count = pop_count;
1280    return insn;
1281 }
1282
1283 /* DO/WHILE loop:
1284  *
1285  * The DO/WHILE is just an unterminated loop -- break or continue are
1286  * used for control within the loop.  We have a few ways they can be
1287  * done.
1288  *
1289  * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1290  * jip and no DO instruction.
1291  *
1292  * For non-uniform control flow pre-gen6, there's a DO instruction to
1293  * push the mask, and a WHILE to jump back, and BREAK to get out and
1294  * pop the mask.
1295  *
1296  * For gen6, there's no more mask stack, so no need for DO.  WHILE
1297  * just points back to the first instruction of the loop.
1298  */
1299 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1300 {
1301    struct intel_context *intel = &p->brw->intel;
1302
1303    if (intel->gen >= 6 || p->single_program_flow) {
1304       return &p->store[p->nr_insn];
1305    } else {
1306       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1307
1308       /* Override the defaults for this instruction:
1309        */
1310       brw_set_dest(p, insn, brw_null_reg());
1311       brw_set_src0(p, insn, brw_null_reg());
1312       brw_set_src1(p, insn, brw_null_reg());
1313
1314       insn->header.compression_control = BRW_COMPRESSION_NONE;
1315       insn->header.execution_size = execute_size;
1316       insn->header.predicate_control = BRW_PREDICATE_NONE;
1317       /* insn->header.mask_control = BRW_MASK_ENABLE; */
1318       /* insn->header.mask_control = BRW_MASK_DISABLE; */
1319
1320       return insn;
1321    }
1322 }
1323
1324
1325
1326 struct brw_instruction *brw_WHILE(struct brw_compile *p, 
1327                                   struct brw_instruction *do_insn)
1328 {
1329    struct intel_context *intel = &p->brw->intel;
1330    struct brw_instruction *insn;
1331    GLuint br = 1;
1332
1333    if (intel->gen >= 5)
1334       br = 2;
1335
1336    if (intel->gen >= 7) {
1337       insn = next_insn(p, BRW_OPCODE_WHILE);
1338
1339       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1340       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1341       brw_set_src1(p, insn, brw_imm_ud(0));
1342       insn->bits3.break_cont.jip = br * (do_insn - insn);
1343
1344       insn->header.execution_size = do_insn->header.execution_size;
1345       assert(insn->header.execution_size == BRW_EXECUTE_8);
1346    } else if (intel->gen == 6) {
1347       insn = next_insn(p, BRW_OPCODE_WHILE);
1348
1349       brw_set_dest(p, insn, brw_imm_w(0));
1350       insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1351       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1352       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1353
1354       insn->header.execution_size = do_insn->header.execution_size;
1355       assert(insn->header.execution_size == BRW_EXECUTE_8);
1356    } else {
1357       if (p->single_program_flow) {
1358          insn = next_insn(p, BRW_OPCODE_ADD);
1359
1360          brw_set_dest(p, insn, brw_ip_reg());
1361          brw_set_src0(p, insn, brw_ip_reg());
1362          brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1363          insn->header.execution_size = BRW_EXECUTE_1;
1364       } else {
1365          insn = next_insn(p, BRW_OPCODE_WHILE);
1366
1367          assert(do_insn->header.opcode == BRW_OPCODE_DO);
1368
1369          brw_set_dest(p, insn, brw_ip_reg());
1370          brw_set_src0(p, insn, brw_ip_reg());
1371          brw_set_src1(p, insn, brw_imm_d(0));
1372
1373          insn->header.execution_size = do_insn->header.execution_size;
1374          insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1375          insn->bits3.if_else.pop_count = 0;
1376          insn->bits3.if_else.pad0 = 0;
1377       }
1378    }
1379    insn->header.compression_control = BRW_COMPRESSION_NONE;
1380    p->current->header.predicate_control = BRW_PREDICATE_NONE;
1381
1382    return insn;
1383 }
1384
1385
1386 /* FORWARD JUMPS:
1387  */
1388 void brw_land_fwd_jump(struct brw_compile *p, 
1389                        struct brw_instruction *jmp_insn)
1390 {
1391    struct intel_context *intel = &p->brw->intel;
1392    struct brw_instruction *landing = &p->store[p->nr_insn];
1393    GLuint jmpi = 1;
1394
1395    if (intel->gen >= 5)
1396       jmpi = 2;
1397
1398    assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1399    assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1400
1401    jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1402 }
1403
1404
1405
1406 /* To integrate with the above, it makes sense that the comparison
1407  * instruction should populate the flag register.  It might be simpler
1408  * just to use the flag reg for most WM tasks?
1409  */
1410 void brw_CMP(struct brw_compile *p,
1411              struct brw_reg dest,
1412              GLuint conditional,
1413              struct brw_reg src0,
1414              struct brw_reg src1)
1415 {
1416    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1417
1418    insn->header.destreg__conditionalmod = conditional;
1419    brw_set_dest(p, insn, dest);
1420    brw_set_src0(p, insn, src0);
1421    brw_set_src1(p, insn, src1);
1422
1423 /*    guess_execution_size(insn, src0); */
1424
1425
1426    /* Make it so that future instructions will use the computed flag
1427     * value until brw_set_predicate_control_flag_value() is called
1428     * again.  
1429     */
1430    if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1431        dest.nr == 0) {
1432       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1433       p->flag_value = 0xff;
1434    }
1435 }
1436
1437 /* Issue 'wait' instruction for n1, host could program MMIO
1438    to wake up thread. */
1439 void brw_WAIT (struct brw_compile *p)
1440 {
1441    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1442    struct brw_reg src = brw_notification_1_reg();
1443
1444    brw_set_dest(p, insn, src);
1445    brw_set_src0(p, insn, src);
1446    brw_set_src1(p, insn, brw_null_reg());
1447    insn->header.execution_size = 0; /* must */
1448    insn->header.predicate_control = 0;
1449    insn->header.compression_control = 0;
1450 }
1451
1452
1453 /***********************************************************************
1454  * Helpers for the various SEND message types:
1455  */
1456
1457 /** Extended math function, float[8].
1458  */
1459 void brw_math( struct brw_compile *p,
1460                struct brw_reg dest,
1461                GLuint function,
1462                GLuint saturate,
1463                GLuint msg_reg_nr,
1464                struct brw_reg src,
1465                GLuint data_type,
1466                GLuint precision )
1467 {
1468    struct intel_context *intel = &p->brw->intel;
1469
1470    if (intel->gen >= 6) {
1471       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1472
1473       assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1474       assert(src.file == BRW_GENERAL_REGISTER_FILE);
1475
1476       assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1477       assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1478
1479       /* Source modifiers are ignored for extended math instructions. */
1480       assert(!src.negate);
1481       assert(!src.abs);
1482
1483       if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1484           function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1485          assert(src.type == BRW_REGISTER_TYPE_F);
1486       }
1487
1488       /* Math is the same ISA format as other opcodes, except that CondModifier
1489        * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1490        */
1491       insn->header.destreg__conditionalmod = function;
1492       insn->header.saturate = saturate;
1493
1494       brw_set_dest(p, insn, dest);
1495       brw_set_src0(p, insn, src);
1496       brw_set_src1(p, insn, brw_null_reg());
1497    } else {
1498       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1499       GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1500       GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1501       /* Example code doesn't set predicate_control for send
1502        * instructions.
1503        */
1504       insn->header.predicate_control = 0;
1505       insn->header.destreg__conditionalmod = msg_reg_nr;
1506
1507       brw_set_dest(p, insn, dest);
1508       brw_set_src0(p, insn, src);
1509       brw_set_math_message(p,
1510                            insn,
1511                            msg_length, response_length,
1512                            function,
1513                            BRW_MATH_INTEGER_UNSIGNED,
1514                            precision,
1515                            saturate,
1516                            data_type);
1517    }
1518 }
1519
1520 /** Extended math function, float[8].
1521  */
1522 void brw_math2(struct brw_compile *p,
1523                struct brw_reg dest,
1524                GLuint function,
1525                struct brw_reg src0,
1526                struct brw_reg src1)
1527 {
1528    struct intel_context *intel = &p->brw->intel;
1529    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1530
1531    assert(intel->gen >= 6);
1532    (void) intel;
1533
1534
1535    assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1536    assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1537    assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1538
1539    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1540    assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1541    assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1542
1543    if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1544        function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1545       assert(src0.type == BRW_REGISTER_TYPE_F);
1546       assert(src1.type == BRW_REGISTER_TYPE_F);
1547    }
1548
1549    /* Source modifiers are ignored for extended math instructions. */
1550    assert(!src0.negate);
1551    assert(!src0.abs);
1552    assert(!src1.negate);
1553    assert(!src1.abs);
1554
1555    /* Math is the same ISA format as other opcodes, except that CondModifier
1556     * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1557     */
1558    insn->header.destreg__conditionalmod = function;
1559
1560    brw_set_dest(p, insn, dest);
1561    brw_set_src0(p, insn, src0);
1562    brw_set_src1(p, insn, src1);
1563 }
1564
1565 /**
1566  * Extended math function, float[16].
1567  * Use 2 send instructions.
1568  */
1569 void brw_math_16( struct brw_compile *p,
1570                   struct brw_reg dest,
1571                   GLuint function,
1572                   GLuint saturate,
1573                   GLuint msg_reg_nr,
1574                   struct brw_reg src,
1575                   GLuint precision )
1576 {
1577    struct intel_context *intel = &p->brw->intel;
1578    struct brw_instruction *insn;
1579    GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
1580    GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
1581
1582    if (intel->gen >= 6) {
1583       insn = next_insn(p, BRW_OPCODE_MATH);
1584
1585       /* Math is the same ISA format as other opcodes, except that CondModifier
1586        * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1587        */
1588       insn->header.destreg__conditionalmod = function;
1589       insn->header.saturate = saturate;
1590
1591       /* Source modifiers are ignored for extended math instructions. */
1592       assert(!src.negate);
1593       assert(!src.abs);
1594
1595       brw_set_dest(p, insn, dest);
1596       brw_set_src0(p, insn, src);
1597       brw_set_src1(p, insn, brw_null_reg());
1598       return;
1599    }
1600
1601    /* First instruction:
1602     */
1603    brw_push_insn_state(p);
1604    brw_set_predicate_control_flag_value(p, 0xff);
1605    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1606
1607    insn = next_insn(p, BRW_OPCODE_SEND);
1608    insn->header.destreg__conditionalmod = msg_reg_nr;
1609
1610    brw_set_dest(p, insn, dest);
1611    brw_set_src0(p, insn, src);
1612    brw_set_math_message(p,
1613                         insn, 
1614                         msg_length, response_length, 
1615                         function,
1616                         BRW_MATH_INTEGER_UNSIGNED,
1617                         precision,
1618                         saturate,
1619                         BRW_MATH_DATA_VECTOR);
1620
1621    /* Second instruction:
1622     */
1623    insn = next_insn(p, BRW_OPCODE_SEND);
1624    insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1625    insn->header.destreg__conditionalmod = msg_reg_nr+1;
1626
1627    brw_set_dest(p, insn, offset(dest,1));
1628    brw_set_src0(p, insn, src);
1629    brw_set_math_message(p, 
1630                         insn, 
1631                         msg_length, response_length, 
1632                         function,
1633                         BRW_MATH_INTEGER_UNSIGNED,
1634                         precision,
1635                         saturate,
1636                         BRW_MATH_DATA_VECTOR);
1637
1638    brw_pop_insn_state(p);
1639 }
1640
1641
1642 /**
1643  * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1644  * using a constant offset per channel.
1645  *
1646  * The offset must be aligned to oword size (16 bytes).  Used for
1647  * register spilling.
1648  */
1649 void brw_oword_block_write_scratch(struct brw_compile *p,
1650                                    struct brw_reg mrf,
1651                                    int num_regs,
1652                                    GLuint offset)
1653 {
1654    struct intel_context *intel = &p->brw->intel;
1655    uint32_t msg_control, msg_type;
1656    int mlen;
1657
1658    if (intel->gen >= 6)
1659       offset /= 16;
1660
1661    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1662
1663    if (num_regs == 1) {
1664       msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1665       mlen = 2;
1666    } else {
1667       msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1668       mlen = 3;
1669    }
1670
1671    /* Set up the message header.  This is g0, with g0.2 filled with
1672     * the offset.  We don't want to leave our offset around in g0 or
1673     * it'll screw up texture samples, so set it up inside the message
1674     * reg.
1675     */
1676    {
1677       brw_push_insn_state(p);
1678       brw_set_mask_control(p, BRW_MASK_DISABLE);
1679       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1680
1681       brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1682
1683       /* set message header global offset field (reg 0, element 2) */
1684       brw_MOV(p,
1685               retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1686                                   mrf.nr,
1687                                   2), BRW_REGISTER_TYPE_UD),
1688               brw_imm_ud(offset));
1689
1690       brw_pop_insn_state(p);
1691    }
1692
1693    {
1694       struct brw_reg dest;
1695       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1696       int send_commit_msg;
1697       struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1698                                          BRW_REGISTER_TYPE_UW);
1699
1700       if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1701          insn->header.compression_control = BRW_COMPRESSION_NONE;
1702          src_header = vec16(src_header);
1703       }
1704       assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1705       insn->header.destreg__conditionalmod = mrf.nr;
1706
1707       /* Until gen6, writes followed by reads from the same location
1708        * are not guaranteed to be ordered unless write_commit is set.
1709        * If set, then a no-op write is issued to the destination
1710        * register to set a dependency, and a read from the destination
1711        * can be used to ensure the ordering.
1712        *
1713        * For gen6, only writes between different threads need ordering
1714        * protection.  Our use of DP writes is all about register
1715        * spilling within a thread.
1716        */
1717       if (intel->gen >= 6) {
1718          dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1719          send_commit_msg = 0;
1720       } else {
1721          dest = src_header;
1722          send_commit_msg = 1;
1723       }
1724
1725       brw_set_dest(p, insn, dest);
1726       if (intel->gen >= 6) {
1727          brw_set_src0(p, insn, mrf);
1728       } else {
1729          brw_set_src0(p, insn, brw_null_reg());
1730       }
1731
1732       if (intel->gen >= 6)
1733          msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1734       else
1735          msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1736
1737       brw_set_dp_write_message(p,
1738                                insn,
1739                                255, /* binding table index (255=stateless) */
1740                                msg_control,
1741                                msg_type,
1742                                mlen,
1743                                GL_TRUE, /* header_present */
1744                                0, /* pixel scoreboard */
1745                                send_commit_msg, /* response_length */
1746                                0, /* eot */
1747                                send_commit_msg);
1748    }
1749 }
1750
1751
1752 /**
1753  * Read a block of owords (half a GRF each) from the scratch buffer
1754  * using a constant index per channel.
1755  *
1756  * Offset must be aligned to oword size (16 bytes).  Used for register
1757  * spilling.
1758  */
1759 void
1760 brw_oword_block_read_scratch(struct brw_compile *p,
1761                              struct brw_reg dest,
1762                              struct brw_reg mrf,
1763                              int num_regs,
1764                              GLuint offset)
1765 {
1766    struct intel_context *intel = &p->brw->intel;
1767    uint32_t msg_control;
1768    int rlen;
1769
1770    if (intel->gen >= 6)
1771       offset /= 16;
1772
1773    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1774    dest = retype(dest, BRW_REGISTER_TYPE_UW);
1775
1776    if (num_regs == 1) {
1777       msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1778       rlen = 1;
1779    } else {
1780       msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1781       rlen = 2;
1782    }
1783
1784    {
1785       brw_push_insn_state(p);
1786       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1787       brw_set_mask_control(p, BRW_MASK_DISABLE);
1788
1789       brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1790
1791       /* set message header global offset field (reg 0, element 2) */
1792       brw_MOV(p,
1793               retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1794                                   mrf.nr,
1795                                   2), BRW_REGISTER_TYPE_UD),
1796               brw_imm_ud(offset));
1797
1798       brw_pop_insn_state(p);
1799    }
1800
1801    {
1802       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1803
1804       assert(insn->header.predicate_control == 0);
1805       insn->header.compression_control = BRW_COMPRESSION_NONE;
1806       insn->header.destreg__conditionalmod = mrf.nr;
1807
1808       brw_set_dest(p, insn, dest);      /* UW? */
1809       if (intel->gen >= 6) {
1810          brw_set_src0(p, insn, mrf);
1811       } else {
1812          brw_set_src0(p, insn, brw_null_reg());
1813       }
1814
1815       brw_set_dp_read_message(p,
1816                               insn,
1817                               255, /* binding table index (255=stateless) */
1818                               msg_control,
1819                               BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1820                               BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1821                               1, /* msg_length */
1822                               rlen);
1823    }
1824 }
1825
1826 /**
1827  * Read a float[4] vector from the data port Data Cache (const buffer).
1828  * Location (in buffer) should be a multiple of 16.
1829  * Used for fetching shader constants.
1830  */
1831 void brw_oword_block_read(struct brw_compile *p,
1832                           struct brw_reg dest,
1833                           struct brw_reg mrf,
1834                           uint32_t offset,
1835                           uint32_t bind_table_index)
1836 {
1837    struct intel_context *intel = &p->brw->intel;
1838
1839    /* On newer hardware, offset is in units of owords. */
1840    if (intel->gen >= 6)
1841       offset /= 16;
1842
1843    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1844
1845    brw_push_insn_state(p);
1846    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1847    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1848    brw_set_mask_control(p, BRW_MASK_DISABLE);
1849
1850    brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1851
1852    /* set message header global offset field (reg 0, element 2) */
1853    brw_MOV(p,
1854            retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1855                                mrf.nr,
1856                                2), BRW_REGISTER_TYPE_UD),
1857            brw_imm_ud(offset));
1858
1859    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1860    insn->header.destreg__conditionalmod = mrf.nr;
1861
1862    /* cast dest to a uword[8] vector */
1863    dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1864
1865    brw_set_dest(p, insn, dest);
1866    if (intel->gen >= 6) {
1867       brw_set_src0(p, insn, mrf);
1868    } else {
1869       brw_set_src0(p, insn, brw_null_reg());
1870    }
1871
1872    brw_set_dp_read_message(p,
1873                            insn,
1874                            bind_table_index,
1875                            BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1876                            BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1877                            BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1878                            1, /* msg_length */
1879                            1); /* response_length (1 reg, 2 owords!) */
1880
1881    brw_pop_insn_state(p);
1882 }
1883
1884 /**
1885  * Read a set of dwords from the data port Data Cache (const buffer).
1886  *
1887  * Location (in buffer) appears as UD offsets in the register after
1888  * the provided mrf header reg.
1889  */
1890 void brw_dword_scattered_read(struct brw_compile *p,
1891                               struct brw_reg dest,
1892                               struct brw_reg mrf,
1893                               uint32_t bind_table_index)
1894 {
1895    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1896
1897    brw_push_insn_state(p);
1898    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1899    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1900    brw_set_mask_control(p, BRW_MASK_DISABLE);
1901    brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1902    brw_pop_insn_state(p);
1903
1904    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1905    insn->header.destreg__conditionalmod = mrf.nr;
1906
1907    /* cast dest to a uword[8] vector */
1908    dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1909
1910    brw_set_dest(p, insn, dest);
1911    brw_set_src0(p, insn, brw_null_reg());
1912
1913    brw_set_dp_read_message(p,
1914                            insn,
1915                            bind_table_index,
1916                            BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1917                            BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1918                            BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1919                            2, /* msg_length */
1920                            1); /* response_length */
1921 }
1922
1923
1924
1925 /**
1926  * Read float[4] constant(s) from VS constant buffer.
1927  * For relative addressing, two float[4] constants will be read into 'dest'.
1928  * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1929  */
1930 void brw_dp_READ_4_vs(struct brw_compile *p,
1931                       struct brw_reg dest,
1932                       GLuint location,
1933                       GLuint bind_table_index)
1934 {
1935    struct intel_context *intel = &p->brw->intel;
1936    struct brw_instruction *insn;
1937    GLuint msg_reg_nr = 1;
1938
1939    if (intel->gen >= 6)
1940       location /= 16;
1941
1942    /* Setup MRF[1] with location/offset into const buffer */
1943    brw_push_insn_state(p);
1944    brw_set_access_mode(p, BRW_ALIGN_1);
1945    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1946    brw_set_mask_control(p, BRW_MASK_DISABLE);
1947    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1948    brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
1949                      BRW_REGISTER_TYPE_UD),
1950            brw_imm_ud(location));
1951    brw_pop_insn_state(p);
1952
1953    insn = next_insn(p, BRW_OPCODE_SEND);
1954
1955    insn->header.predicate_control = BRW_PREDICATE_NONE;
1956    insn->header.compression_control = BRW_COMPRESSION_NONE;
1957    insn->header.destreg__conditionalmod = msg_reg_nr;
1958    insn->header.mask_control = BRW_MASK_DISABLE;
1959
1960    brw_set_dest(p, insn, dest);
1961    if (intel->gen >= 6) {
1962       brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1963    } else {
1964       brw_set_src0(p, insn, brw_null_reg());
1965    }
1966
1967    brw_set_dp_read_message(p,
1968                            insn,
1969                            bind_table_index,
1970                            0,
1971                            BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1972                            BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1973                            1, /* msg_length */
1974                            1); /* response_length (1 Oword) */
1975 }
1976
1977 /**
1978  * Read a float[4] constant per vertex from VS constant buffer, with
1979  * relative addressing.
1980  */
1981 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1982                                struct brw_reg dest,
1983                                struct brw_reg addr_reg,
1984                                GLuint offset,
1985                                GLuint bind_table_index)
1986 {
1987    struct intel_context *intel = &p->brw->intel;
1988    struct brw_reg src = brw_vec8_grf(0, 0);
1989    int msg_type;
1990
1991    /* Setup MRF[1] with offset into const buffer */
1992    brw_push_insn_state(p);
1993    brw_set_access_mode(p, BRW_ALIGN_1);
1994    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1995    brw_set_mask_control(p, BRW_MASK_DISABLE);
1996    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1997
1998    /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1999     * fields ignored.
2000     */
2001    brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
2002            addr_reg, brw_imm_d(offset));
2003    brw_pop_insn_state(p);
2004
2005    gen6_resolve_implied_move(p, &src, 0);
2006    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2007
2008    insn->header.predicate_control = BRW_PREDICATE_NONE;
2009    insn->header.compression_control = BRW_COMPRESSION_NONE;
2010    insn->header.destreg__conditionalmod = 0;
2011    insn->header.mask_control = BRW_MASK_DISABLE;
2012
2013    brw_set_dest(p, insn, dest);
2014    brw_set_src0(p, insn, src);
2015
2016    if (intel->gen >= 6)
2017       msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2018    else if (intel->gen == 5 || intel->is_g4x)
2019       msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2020    else
2021       msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2022
2023    brw_set_dp_read_message(p,
2024                            insn,
2025                            bind_table_index,
2026                            BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2027                            msg_type,
2028                            BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2029                            2, /* msg_length */
2030                            1); /* response_length */
2031 }
2032
2033
2034
2035 void brw_fb_WRITE(struct brw_compile *p,
2036                   int dispatch_width,
2037                   GLuint msg_reg_nr,
2038                   struct brw_reg src0,
2039                   GLuint binding_table_index,
2040                   GLuint msg_length,
2041                   GLuint response_length,
2042                   GLboolean eot,
2043                   GLboolean header_present)
2044 {
2045    struct intel_context *intel = &p->brw->intel;
2046    struct brw_instruction *insn;
2047    GLuint msg_control, msg_type;
2048    struct brw_reg dest;
2049
2050    if (dispatch_width == 16)
2051       dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2052    else
2053       dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2054
2055    if (intel->gen >= 6 && binding_table_index == 0) {
2056       insn = next_insn(p, BRW_OPCODE_SENDC);
2057    } else {
2058       insn = next_insn(p, BRW_OPCODE_SEND);
2059    }
2060    /* The execution mask is ignored for render target writes. */
2061    insn->header.predicate_control = 0;
2062    insn->header.compression_control = BRW_COMPRESSION_NONE;
2063
2064    if (intel->gen >= 6) {
2065       /* headerless version, just submit color payload */
2066       src0 = brw_message_reg(msg_reg_nr);
2067
2068       msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2069    } else {
2070       insn->header.destreg__conditionalmod = msg_reg_nr;
2071
2072       msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2073    }
2074
2075    if (dispatch_width == 16)
2076       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
2077    else
2078       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
2079
2080    brw_set_dest(p, insn, dest);
2081    brw_set_src0(p, insn, src0);
2082    brw_set_dp_write_message(p,
2083                             insn,
2084                             binding_table_index,
2085                             msg_control,
2086                             msg_type,
2087                             msg_length,
2088                             header_present,
2089                             1,  /* pixel scoreboard */
2090                             response_length,
2091                             eot,
2092                             0 /* send_commit_msg */);
2093 }
2094
2095
2096 /**
2097  * Texture sample instruction.
2098  * Note: the msg_type plus msg_length values determine exactly what kind
2099  * of sampling operation is performed.  See volume 4, page 161 of docs.
2100  */
2101 void brw_SAMPLE(struct brw_compile *p,
2102                 struct brw_reg dest,
2103                 GLuint msg_reg_nr,
2104                 struct brw_reg src0,
2105                 GLuint binding_table_index,
2106                 GLuint sampler,
2107                 GLuint writemask,
2108                 GLuint msg_type,
2109                 GLuint response_length,
2110                 GLuint msg_length,
2111                 GLboolean eot,
2112                 GLuint header_present,
2113                 GLuint simd_mode)
2114 {
2115    struct intel_context *intel = &p->brw->intel;
2116    GLboolean need_stall = 0;
2117
2118    if (writemask == 0) {
2119       /*printf("%s: zero writemask??\n", __FUNCTION__); */
2120       return;
2121    }
2122    
2123    /* Hardware doesn't do destination dependency checking on send
2124     * instructions properly.  Add a workaround which generates the
2125     * dependency by other means.  In practice it seems like this bug
2126     * only crops up for texture samples, and only where registers are
2127     * written by the send and then written again later without being
2128     * read in between.  Luckily for us, we already track that
2129     * information and use it to modify the writemask for the
2130     * instruction, so that is a guide for whether a workaround is
2131     * needed.
2132     */
2133    if (writemask != WRITEMASK_XYZW) {
2134       GLuint dst_offset = 0;
2135       GLuint i, newmask = 0, len = 0;
2136
2137       for (i = 0; i < 4; i++) {
2138          if (writemask & (1<<i))
2139             break;
2140          dst_offset += 2;
2141       }
2142       for (; i < 4; i++) {
2143          if (!(writemask & (1<<i)))
2144             break;
2145          newmask |= 1<<i;
2146          len++;
2147       }
2148
2149       if (newmask != writemask) {
2150          need_stall = 1;
2151          /* printf("need stall %x %x\n", newmask , writemask); */
2152       }
2153       else {
2154          GLboolean dispatch_16 = GL_FALSE;
2155
2156          struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2157
2158          guess_execution_size(p, p->current, dest);
2159          if (p->current->header.execution_size == BRW_EXECUTE_16)
2160             dispatch_16 = GL_TRUE;
2161
2162          newmask = ~newmask & WRITEMASK_XYZW;
2163
2164          brw_push_insn_state(p);
2165
2166          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2167          brw_set_mask_control(p, BRW_MASK_DISABLE);
2168
2169          brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2170                  retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2171          brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 
2172
2173          brw_pop_insn_state(p);
2174
2175          src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
2176          dest = offset(dest, dst_offset);
2177
2178          /* For 16-wide dispatch, masked channels are skipped in the
2179           * response.  For 8-wide, masked channels still take up slots,
2180           * and are just not written to.
2181           */
2182          if (dispatch_16)
2183             response_length = len * 2;
2184       }
2185    }
2186
2187    {
2188       struct brw_instruction *insn;
2189    
2190       gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2191
2192       insn = next_insn(p, BRW_OPCODE_SEND);
2193       insn->header.predicate_control = 0; /* XXX */
2194       insn->header.compression_control = BRW_COMPRESSION_NONE;
2195       if (intel->gen < 6)
2196           insn->header.destreg__conditionalmod = msg_reg_nr;
2197
2198       brw_set_dest(p, insn, dest);
2199       brw_set_src0(p, insn, src0);
2200       brw_set_sampler_message(p, insn,
2201                               binding_table_index,
2202                               sampler,
2203                               msg_type,
2204                               response_length, 
2205                               msg_length,
2206                               eot,
2207                               header_present,
2208                               simd_mode);
2209    }
2210
2211    if (need_stall) {
2212       struct brw_reg reg = vec8(offset(dest, response_length-1));
2213
2214       /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
2215        */
2216       brw_push_insn_state(p);
2217       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2218       brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2219               retype(reg, BRW_REGISTER_TYPE_UD));
2220       brw_pop_insn_state(p);
2221    }
2222
2223 }
2224
2225 /* All these variables are pretty confusing - we might be better off
2226  * using bitmasks and macros for this, in the old style.  Or perhaps
2227  * just having the caller instantiate the fields in dword3 itself.
2228  */
2229 void brw_urb_WRITE(struct brw_compile *p,
2230                    struct brw_reg dest,
2231                    GLuint msg_reg_nr,
2232                    struct brw_reg src0,
2233                    GLboolean allocate,
2234                    GLboolean used,
2235                    GLuint msg_length,
2236                    GLuint response_length,
2237                    GLboolean eot,
2238                    GLboolean writes_complete,
2239                    GLuint offset,
2240                    GLuint swizzle)
2241 {
2242    struct intel_context *intel = &p->brw->intel;
2243    struct brw_instruction *insn;
2244
2245    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2246
2247    if (intel->gen == 7) {
2248       /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2249       brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2250                        BRW_REGISTER_TYPE_UD),
2251                 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2252                 brw_imm_ud(0xff00));
2253    }
2254
2255    insn = next_insn(p, BRW_OPCODE_SEND);
2256
2257    assert(msg_length < BRW_MAX_MRF);
2258
2259    brw_set_dest(p, insn, dest);
2260    brw_set_src0(p, insn, src0);
2261    brw_set_src1(p, insn, brw_imm_d(0));
2262
2263    if (intel->gen < 6)
2264       insn->header.destreg__conditionalmod = msg_reg_nr;
2265
2266    brw_set_urb_message(p,
2267                        insn,
2268                        allocate,
2269                        used,
2270                        msg_length,
2271                        response_length, 
2272                        eot, 
2273                        writes_complete, 
2274                        offset,
2275                        swizzle);
2276 }
2277
2278 static int
2279 brw_find_next_block_end(struct brw_compile *p, int start)
2280 {
2281    int ip;
2282
2283    for (ip = start + 1; ip < p->nr_insn; ip++) {
2284       struct brw_instruction *insn = &p->store[ip];
2285
2286       switch (insn->header.opcode) {
2287       case BRW_OPCODE_ENDIF:
2288       case BRW_OPCODE_ELSE:
2289       case BRW_OPCODE_WHILE:
2290          return ip;
2291       }
2292    }
2293    assert(!"not reached");
2294    return start + 1;
2295 }
2296
2297 /* There is no DO instruction on gen6, so to find the end of the loop
2298  * we have to see if the loop is jumping back before our start
2299  * instruction.
2300  */
2301 static int
2302 brw_find_loop_end(struct brw_compile *p, int start)
2303 {
2304    struct intel_context *intel = &p->brw->intel;
2305    int ip;
2306    int br = 2;
2307
2308    for (ip = start + 1; ip < p->nr_insn; ip++) {
2309       struct brw_instruction *insn = &p->store[ip];
2310
2311       if (insn->header.opcode == BRW_OPCODE_WHILE) {
2312          int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2313                                    : insn->bits3.break_cont.jip;
2314          if (ip + jip / br < start)
2315             return ip;
2316       }
2317    }
2318    assert(!"not reached");
2319    return start + 1;
2320 }
2321
2322 /* After program generation, go back and update the UIP and JIP of
2323  * BREAK and CONT instructions to their correct locations.
2324  */
2325 void
2326 brw_set_uip_jip(struct brw_compile *p)
2327 {
2328    struct intel_context *intel = &p->brw->intel;
2329    int ip;
2330    int br = 2;
2331
2332    if (intel->gen < 6)
2333       return;
2334
2335    for (ip = 0; ip < p->nr_insn; ip++) {
2336       struct brw_instruction *insn = &p->store[ip];
2337
2338       switch (insn->header.opcode) {
2339       case BRW_OPCODE_BREAK:
2340          insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2341          /* Gen7 UIP points to WHILE; Gen6 points just after it */
2342          insn->bits3.break_cont.uip =
2343             br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2344          break;
2345       case BRW_OPCODE_CONTINUE:
2346          insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2347          insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2348
2349          assert(insn->bits3.break_cont.uip != 0);
2350          assert(insn->bits3.break_cont.jip != 0);
2351          break;
2352       }
2353    }
2354 }
2355
2356 void brw_ff_sync(struct brw_compile *p,
2357                    struct brw_reg dest,
2358                    GLuint msg_reg_nr,
2359                    struct brw_reg src0,
2360                    GLboolean allocate,
2361                    GLuint response_length,
2362                    GLboolean eot)
2363 {
2364    struct intel_context *intel = &p->brw->intel;
2365    struct brw_instruction *insn;
2366
2367    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2368
2369    insn = next_insn(p, BRW_OPCODE_SEND);
2370    brw_set_dest(p, insn, dest);
2371    brw_set_src0(p, insn, src0);
2372    brw_set_src1(p, insn, brw_imm_d(0));
2373
2374    if (intel->gen < 6)
2375       insn->header.destreg__conditionalmod = msg_reg_nr;
2376
2377    brw_set_ff_sync_message(p,
2378                            insn,
2379                            allocate,
2380                            response_length,
2381                            eot);
2382 }