Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / gallium / drivers / i965 / brw_eu_emit.c
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4  develop this 3D driver.
5  
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13  
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17  
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keith@tungstengraphics.com>
30   */
31      
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36 #include "brw_debug.h"
37 #include "brw_disasm.h"
38
39
40
41
42 /***********************************************************************
43  * Internal helper for constructing instructions
44  */
45
46 static void guess_execution_size( struct brw_instruction *insn,
47                                   struct brw_reg reg )
48 {
49    if (reg.width == BRW_WIDTH_8 && 
50        insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 
51       insn->header.execution_size = BRW_EXECUTE_16;
52    else
53       insn->header.execution_size = reg.width;  /* note - definitions are compatible */
54 }
55
56
57 static void brw_set_dest( struct brw_instruction *insn,
58                           struct brw_reg dest )
59 {
60    if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
61       assert(dest.nr < 128);
62
63    insn->bits1.da1.dest_reg_file = dest.file;
64    insn->bits1.da1.dest_reg_type = dest.type;
65    insn->bits1.da1.dest_address_mode = dest.address_mode;
66
67    if (dest.address_mode == BRW_ADDRESS_DIRECT) {   
68       insn->bits1.da1.dest_reg_nr = dest.nr;
69
70       if (insn->header.access_mode == BRW_ALIGN_1) {
71          insn->bits1.da1.dest_subreg_nr = dest.subnr;
72          if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
73             dest.hstride = BRW_HORIZONTAL_STRIDE_1;
74          insn->bits1.da1.dest_horiz_stride = dest.hstride;
75       }
76       else {
77          insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
78          insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
79       }
80    }
81    else {
82       insn->bits1.ia1.dest_subreg_nr = dest.subnr;
83
84       /* These are different sizes in align1 vs align16:
85        */
86       if (insn->header.access_mode == BRW_ALIGN_1) {
87          insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
88          if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
89             dest.hstride = BRW_HORIZONTAL_STRIDE_1;
90          insn->bits1.ia1.dest_horiz_stride = dest.hstride;
91       }
92       else {
93          insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
94       }
95    }
96
97    /* NEW: Set the execution size based on dest.width and
98     * insn->compression_control:
99     */
100    guess_execution_size(insn, dest);
101 }
102
103 static void brw_set_src0( struct brw_instruction *insn,
104                           struct brw_reg reg )
105 {
106    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
107
108    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
109       assert(reg.nr < 128);
110
111    insn->bits1.da1.src0_reg_file = reg.file;
112    insn->bits1.da1.src0_reg_type = reg.type;
113    insn->bits2.da1.src0_abs = reg.abs;
114    insn->bits2.da1.src0_negate = reg.negate;
115    insn->bits2.da1.src0_address_mode = reg.address_mode;
116
117    if (reg.file == BRW_IMMEDIATE_VALUE) {
118       insn->bits3.ud = reg.dw1.ud;
119    
120       /* Required to set some fields in src1 as well:
121        */
122       insn->bits1.da1.src1_reg_file = 0; /* arf */
123       insn->bits1.da1.src1_reg_type = reg.type;
124    }
125    else 
126    {
127       if (reg.address_mode == BRW_ADDRESS_DIRECT) {
128          if (insn->header.access_mode == BRW_ALIGN_1) {
129             insn->bits2.da1.src0_subreg_nr = reg.subnr;
130             insn->bits2.da1.src0_reg_nr = reg.nr;
131          }
132          else {
133             insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
134             insn->bits2.da16.src0_reg_nr = reg.nr;
135          }
136       }
137       else {
138          insn->bits2.ia1.src0_subreg_nr = reg.subnr;
139
140          if (insn->header.access_mode == BRW_ALIGN_1) {
141             insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 
142          }
143          else {
144             insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
145          }
146       }
147
148       if (insn->header.access_mode == BRW_ALIGN_1) {
149          if (reg.width == BRW_WIDTH_1 && 
150              insn->header.execution_size == BRW_EXECUTE_1) {
151             insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
152             insn->bits2.da1.src0_width = BRW_WIDTH_1;
153             insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
154          }
155          else {
156             insn->bits2.da1.src0_horiz_stride = reg.hstride;
157             insn->bits2.da1.src0_width = reg.width;
158             insn->bits2.da1.src0_vert_stride = reg.vstride;
159          }
160       }
161       else {
162          insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
163          insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
164          insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
165          insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
166
167          /* This is an oddity of the fact we're using the same
168           * descriptions for registers in align_16 as align_1:
169           */
170          if (reg.vstride == BRW_VERTICAL_STRIDE_8)
171             insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
172          else
173             insn->bits2.da16.src0_vert_stride = reg.vstride;
174       }
175    }
176 }
177
178
179 void brw_set_src1( struct brw_instruction *insn,
180                    struct brw_reg reg )
181 {
182    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
183
184    assert(reg.nr < 128);
185
186    insn->bits1.da1.src1_reg_file = reg.file;
187    insn->bits1.da1.src1_reg_type = reg.type;
188    insn->bits3.da1.src1_abs = reg.abs;
189    insn->bits3.da1.src1_negate = reg.negate;
190
191    /* Only src1 can be immediate in two-argument instructions.
192     */
193    assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
194
195    if (reg.file == BRW_IMMEDIATE_VALUE) {
196       insn->bits3.ud = reg.dw1.ud;
197    }
198    else {
199       /* This is a hardware restriction, which may or may not be lifted
200        * in the future:
201        */
202       assert (reg.address_mode == BRW_ADDRESS_DIRECT);
203       /*assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
204
205       if (insn->header.access_mode == BRW_ALIGN_1) {
206          insn->bits3.da1.src1_subreg_nr = reg.subnr;
207          insn->bits3.da1.src1_reg_nr = reg.nr;
208       }
209       else {
210          insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
211          insn->bits3.da16.src1_reg_nr = reg.nr;
212       }
213
214       if (insn->header.access_mode == BRW_ALIGN_1) {
215          if (reg.width == BRW_WIDTH_1 && 
216              insn->header.execution_size == BRW_EXECUTE_1) {
217             insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
218             insn->bits3.da1.src1_width = BRW_WIDTH_1;
219             insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
220          }
221          else {
222             insn->bits3.da1.src1_horiz_stride = reg.hstride;
223             insn->bits3.da1.src1_width = reg.width;
224             insn->bits3.da1.src1_vert_stride = reg.vstride;
225          }
226       }
227       else {
228          insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
229          insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
230          insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
231          insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
232
233          /* This is an oddity of the fact we're using the same
234           * descriptions for registers in align_16 as align_1:
235           */
236          if (reg.vstride == BRW_VERTICAL_STRIDE_8)
237             insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
238          else
239             insn->bits3.da16.src1_vert_stride = reg.vstride;
240       }
241    }
242 }
243
244
245
246 static void brw_set_math_message( struct brw_context *brw,
247                                   struct brw_instruction *insn,
248                                   GLuint msg_length,
249                                   GLuint response_length,
250                                   GLuint function,
251                                   GLuint integer_type,
252                                   GLboolean low_precision,
253                                   GLboolean saturate,
254                                   GLuint dataType )
255 {
256    brw_set_src1(insn, brw_imm_d(0));
257
258    if (brw->gen == 5) {
259        insn->bits3.math_gen5.function = function;
260        insn->bits3.math_gen5.int_type = integer_type;
261        insn->bits3.math_gen5.precision = low_precision;
262        insn->bits3.math_gen5.saturate = saturate;
263        insn->bits3.math_gen5.data_type = dataType;
264        insn->bits3.math_gen5.snapshot = 0;
265        insn->bits3.math_gen5.header_present = 0;
266        insn->bits3.math_gen5.response_length = response_length;
267        insn->bits3.math_gen5.msg_length = msg_length;
268        insn->bits3.math_gen5.end_of_thread = 0;
269        insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
270        insn->bits2.send_gen5.end_of_thread = 0;
271    } else {
272        insn->bits3.math.function = function;
273        insn->bits3.math.int_type = integer_type;
274        insn->bits3.math.precision = low_precision;
275        insn->bits3.math.saturate = saturate;
276        insn->bits3.math.data_type = dataType;
277        insn->bits3.math.response_length = response_length;
278        insn->bits3.math.msg_length = msg_length;
279        insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
280        insn->bits3.math.end_of_thread = 0;
281    }
282 }
283
284
285 static void brw_set_ff_sync_message( struct brw_context *brw,
286                                  struct brw_instruction *insn,
287                                  GLboolean allocate,
288                                  GLboolean used,
289                                  GLuint msg_length,
290                                  GLuint response_length,
291                                  GLboolean end_of_thread,
292                                  GLboolean complete,
293                                  GLuint offset,
294                                  GLuint swizzle_control )
295 {
296         brw_set_src1(insn, brw_imm_d(0));
297
298         insn->bits3.urb_gen5.opcode = 1;
299         insn->bits3.urb_gen5.offset = offset;
300         insn->bits3.urb_gen5.swizzle_control = swizzle_control;
301         insn->bits3.urb_gen5.allocate = allocate;
302         insn->bits3.urb_gen5.used = used;
303         insn->bits3.urb_gen5.complete = complete;
304         insn->bits3.urb_gen5.header_present = 1;
305         insn->bits3.urb_gen5.response_length = response_length;
306         insn->bits3.urb_gen5.msg_length = msg_length;
307         insn->bits3.urb_gen5.end_of_thread = end_of_thread;
308         insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
309         insn->bits2.send_gen5.end_of_thread = end_of_thread;
310 }
311
312 static void brw_set_urb_message( struct brw_context *brw,
313                                  struct brw_instruction *insn,
314                                  GLboolean allocate,
315                                  GLboolean used,
316                                  GLuint msg_length,
317                                  GLuint response_length,
318                                  GLboolean end_of_thread,
319                                  GLboolean complete,
320                                  GLuint offset,
321                                  GLuint swizzle_control )
322 {
323     brw_set_src1(insn, brw_imm_d(0));
324
325     if (brw->gen == 5) {
326         insn->bits3.urb_gen5.opcode = 0;        /* ? */
327         insn->bits3.urb_gen5.offset = offset;
328         insn->bits3.urb_gen5.swizzle_control = swizzle_control;
329         insn->bits3.urb_gen5.allocate = allocate;
330         insn->bits3.urb_gen5.used = used;       /* ? */
331         insn->bits3.urb_gen5.complete = complete;
332         insn->bits3.urb_gen5.header_present = 1;
333         insn->bits3.urb_gen5.response_length = response_length;
334         insn->bits3.urb_gen5.msg_length = msg_length;
335         insn->bits3.urb_gen5.end_of_thread = end_of_thread;
336         insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
337         insn->bits2.send_gen5.end_of_thread = end_of_thread;
338     } else {
339         insn->bits3.urb.opcode = 0;     /* ? */
340         insn->bits3.urb.offset = offset;
341         insn->bits3.urb.swizzle_control = swizzle_control;
342         insn->bits3.urb.allocate = allocate;
343         insn->bits3.urb.used = used;    /* ? */
344         insn->bits3.urb.complete = complete;
345         insn->bits3.urb.response_length = response_length;
346         insn->bits3.urb.msg_length = msg_length;
347         insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
348         insn->bits3.urb.end_of_thread = end_of_thread;
349     }
350 }
351
352 static void brw_set_dp_write_message( struct brw_context *brw,
353                                       struct brw_instruction *insn,
354                                       GLuint binding_table_index,
355                                       GLuint msg_control,
356                                       GLuint msg_type,
357                                       GLuint msg_length,
358                                       GLuint pixel_scoreboard_clear,
359                                       GLuint response_length,
360                                       GLuint end_of_thread )
361 {
362    brw_set_src1(insn, brw_imm_d(0));
363
364    if (brw->gen == 5) {
365        insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
366        insn->bits3.dp_write_gen5.msg_control = msg_control;
367        insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
368        insn->bits3.dp_write_gen5.msg_type = msg_type;
369        insn->bits3.dp_write_gen5.send_commit_msg = 0;
370        insn->bits3.dp_write_gen5.header_present = 1;
371        insn->bits3.dp_write_gen5.response_length = response_length;
372        insn->bits3.dp_write_gen5.msg_length = msg_length;
373        insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
374        insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
375        insn->bits2.send_gen5.end_of_thread = end_of_thread;
376    } else {
377        insn->bits3.dp_write.binding_table_index = binding_table_index;
378        insn->bits3.dp_write.msg_control = msg_control;
379        insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
380        insn->bits3.dp_write.msg_type = msg_type;
381        insn->bits3.dp_write.send_commit_msg = 0;
382        insn->bits3.dp_write.response_length = response_length;
383        insn->bits3.dp_write.msg_length = msg_length;
384        insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
385        insn->bits3.dp_write.end_of_thread = end_of_thread;
386    }
387 }
388
389 static void brw_set_dp_read_message( struct brw_context *brw,
390                                       struct brw_instruction *insn,
391                                       GLuint binding_table_index,
392                                       GLuint msg_control,
393                                       GLuint msg_type,
394                                       GLuint target_cache,
395                                       GLuint msg_length,
396                                       GLuint response_length,
397                                       GLuint end_of_thread )
398 {
399    brw_set_src1(insn, brw_imm_d(0));
400
401    if (brw->gen == 5) {
402        insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
403        insn->bits3.dp_read_gen5.msg_control = msg_control;
404        insn->bits3.dp_read_gen5.msg_type = msg_type;
405        insn->bits3.dp_read_gen5.target_cache = target_cache;
406        insn->bits3.dp_read_gen5.header_present = 1;
407        insn->bits3.dp_read_gen5.response_length = response_length;
408        insn->bits3.dp_read_gen5.msg_length = msg_length;
409        insn->bits3.dp_read_gen5.pad1 = 0;
410        insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
411        insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
412        insn->bits2.send_gen5.end_of_thread = end_of_thread;
413    } else {
414        insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
415        insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
416        insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
417        insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
418        insn->bits3.dp_read.response_length = response_length;  /*16:19*/
419        insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
420        insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
421        insn->bits3.dp_read.pad1 = 0;  /*28:30*/
422        insn->bits3.dp_read.end_of_thread = end_of_thread;  /*31*/
423    }
424 }
425
426 static void brw_set_sampler_message(struct brw_context *brw,
427                                     struct brw_instruction *insn,
428                                     GLuint binding_table_index,
429                                     GLuint sampler,
430                                     GLuint msg_type,
431                                     GLuint response_length,
432                                     GLuint msg_length,
433                                     GLboolean eot,
434                                     GLuint header_present,
435                                     GLuint simd_mode)
436 {
437    assert(eot == 0);
438    brw_set_src1(insn, brw_imm_d(0));
439
440    if (brw->gen == 5) {
441       insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
442       insn->bits3.sampler_gen5.sampler = sampler;
443       insn->bits3.sampler_gen5.msg_type = msg_type;
444       insn->bits3.sampler_gen5.simd_mode = simd_mode;
445       insn->bits3.sampler_gen5.header_present = header_present;
446       insn->bits3.sampler_gen5.response_length = response_length;
447       insn->bits3.sampler_gen5.msg_length = msg_length;
448       insn->bits3.sampler_gen5.end_of_thread = eot;
449       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
450       insn->bits2.send_gen5.end_of_thread = eot;
451    } else if (brw->is_g4x) {
452       insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
453       insn->bits3.sampler_g4x.sampler = sampler;
454       insn->bits3.sampler_g4x.msg_type = msg_type;
455       insn->bits3.sampler_g4x.response_length = response_length;
456       insn->bits3.sampler_g4x.msg_length = msg_length;
457       insn->bits3.sampler_g4x.end_of_thread = eot;
458       insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
459    } else {
460       insn->bits3.sampler.binding_table_index = binding_table_index;
461       insn->bits3.sampler.sampler = sampler;
462       insn->bits3.sampler.msg_type = msg_type;
463       insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
464       insn->bits3.sampler.response_length = response_length;
465       insn->bits3.sampler.msg_length = msg_length;
466       insn->bits3.sampler.end_of_thread = eot;
467       insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
468    }
469 }
470
471
472
473 static struct brw_instruction *next_insn( struct brw_compile *p, 
474                                           GLuint opcode )
475 {
476    struct brw_instruction *insn;
477
478    if (0 && (BRW_DEBUG & DEBUG_DISASSEM))
479    {
480       if (p->nr_insn) 
481           brw_disasm_insn(stderr, &p->store[p->nr_insn-1], p->brw->gen);
482    }
483
484    assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
485
486    insn = &p->store[p->nr_insn++];
487    memcpy(insn, p->current, sizeof(*insn));
488
489    /* Reset this one-shot flag: 
490     */
491
492    if (p->current->header.destreg__conditionalmod) {
493       p->current->header.destreg__conditionalmod = 0;
494       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
495    }
496
497    insn->header.opcode = opcode;
498    return insn;
499 }
500
501
502 static struct brw_instruction *brw_alu1( struct brw_compile *p,
503                                          GLuint opcode,
504                                          struct brw_reg dest,
505                                          struct brw_reg src )
506 {
507    struct brw_instruction *insn = next_insn(p, opcode);
508    brw_set_dest(insn, dest);
509    brw_set_src0(insn, src);   
510    return insn;
511 }
512
513 static struct brw_instruction *brw_alu2(struct brw_compile *p,
514                                         GLuint opcode,
515                                         struct brw_reg dest,
516                                         struct brw_reg src0,
517                                         struct brw_reg src1 )
518 {
519    struct brw_instruction *insn = next_insn(p, opcode);   
520    brw_set_dest(insn, dest);
521    brw_set_src0(insn, src0);
522    brw_set_src1(insn, src1);
523    return insn;
524 }
525
526
527 /***********************************************************************
528  * Convenience routines.
529  */
530 #define ALU1(OP)                                        \
531 struct brw_instruction *brw_##OP(struct brw_compile *p, \
532               struct brw_reg dest,                      \
533               struct brw_reg src0)                      \
534 {                                                       \
535    return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);     \
536 }
537
538 #define ALU2(OP)                                        \
539 struct brw_instruction *brw_##OP(struct brw_compile *p, \
540               struct brw_reg dest,                      \
541               struct brw_reg src0,                      \
542               struct brw_reg src1)                      \
543 {                                                       \
544    return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);       \
545 }
546
547
548 ALU1(MOV)
549 ALU2(SEL)
550 ALU1(NOT)
551 ALU2(AND)
552 ALU2(OR)
553 ALU2(XOR)
554 ALU2(SHR)
555 ALU2(SHL)
556 ALU2(RSR)
557 ALU2(RSL)
558 ALU2(ASR)
559 ALU2(ADD)
560 ALU2(MUL)
561 ALU1(FRC)
562 ALU1(RNDD)
563 ALU1(RNDZ)
564 ALU2(MAC)
565 ALU2(MACH)
566 ALU1(LZD)
567 ALU2(DP4)
568 ALU2(DPH)
569 ALU2(DP3)
570 ALU2(DP2)
571 ALU2(LINE)
572
573
574
575
576 void brw_NOP(struct brw_compile *p)
577 {
578    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);   
579    brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
580    brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
581    brw_set_src1(insn, brw_imm_ud(0x0));
582 }
583
584
585
586
587
588 /***********************************************************************
589  * Comparisons, if/else/endif
590  */
591
592 struct brw_instruction *brw_JMPI(struct brw_compile *p, 
593                                  struct brw_reg dest,
594                                  struct brw_reg src0,
595                                  struct brw_reg src1)
596 {
597    struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
598
599    insn->header.execution_size = 1;
600    insn->header.compression_control = BRW_COMPRESSION_NONE;
601    insn->header.mask_control = BRW_MASK_DISABLE;
602
603    p->current->header.predicate_control = BRW_PREDICATE_NONE;
604
605    return insn;
606 }
607
608 /* EU takes the value from the flag register and pushes it onto some
609  * sort of a stack (presumably merging with any flag value already on
610  * the stack).  Within an if block, the flags at the top of the stack
611  * control execution on each channel of the unit, eg. on each of the
612  * 16 pixel values in our wm programs.
613  *
614  * When the matching 'else' instruction is reached (presumably by
615  * countdown of the instruction count patched in by our ELSE/ENDIF
616  * functions), the relevent flags are inverted.
617  *
618  * When the matching 'endif' instruction is reached, the flags are
619  * popped off.  If the stack is now empty, normal execution resumes.
620  *
621  * No attempt is made to deal with stack overflow (14 elements?).
622  */
623 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
624 {
625    struct brw_instruction *insn;
626
627    if (p->single_program_flow) {
628       assert(execute_size == BRW_EXECUTE_1);
629
630       insn = next_insn(p, BRW_OPCODE_ADD);
631       insn->header.predicate_inverse = 1;
632    } else {
633       insn = next_insn(p, BRW_OPCODE_IF);
634    }
635
636    /* Override the defaults for this instruction:
637     */
638    brw_set_dest(insn, brw_ip_reg());
639    brw_set_src0(insn, brw_ip_reg());
640    brw_set_src1(insn, brw_imm_d(0x0));
641
642    insn->header.execution_size = execute_size;
643    insn->header.compression_control = BRW_COMPRESSION_NONE;
644    insn->header.predicate_control = BRW_PREDICATE_NORMAL;
645    insn->header.mask_control = BRW_MASK_ENABLE;
646    if (!p->single_program_flow)
647        insn->header.thread_control = BRW_THREAD_SWITCH;
648
649    p->current->header.predicate_control = BRW_PREDICATE_NONE;
650
651    return insn;
652 }
653
654
655 struct brw_instruction *brw_ELSE(struct brw_compile *p, 
656                                  struct brw_instruction *if_insn)
657 {
658    struct brw_instruction *insn;
659    GLuint br = 1;
660
661    if (p->brw->gen == 5)
662       br = 2;
663
664    if (p->single_program_flow) {
665       insn = next_insn(p, BRW_OPCODE_ADD);
666    } else {
667       insn = next_insn(p, BRW_OPCODE_ELSE);
668    }
669
670    brw_set_dest(insn, brw_ip_reg());
671    brw_set_src0(insn, brw_ip_reg());
672    brw_set_src1(insn, brw_imm_d(0x0));
673
674    insn->header.compression_control = BRW_COMPRESSION_NONE;
675    insn->header.execution_size = if_insn->header.execution_size;
676    insn->header.mask_control = BRW_MASK_ENABLE;
677    if (!p->single_program_flow)
678        insn->header.thread_control = BRW_THREAD_SWITCH;
679
680    /* Patch the if instruction to point at this instruction.
681     */
682    if (p->single_program_flow) {
683       assert(if_insn->header.opcode == BRW_OPCODE_ADD);
684
685       if_insn->bits3.ud = (insn - if_insn + 1) * 16;
686    } else {
687       assert(if_insn->header.opcode == BRW_OPCODE_IF);
688
689       if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
690       if_insn->bits3.if_else.pop_count = 0;
691       if_insn->bits3.if_else.pad0 = 0;
692    }
693
694    return insn;
695 }
696
697 void brw_ENDIF(struct brw_compile *p, 
698                struct brw_instruction *patch_insn)
699 {
700    GLuint br = 1;
701
702    if (p->brw->gen == 5)
703       br = 2; 
704  
705    if (p->single_program_flow) {
706       /* In single program flow mode, there's no need to execute an ENDIF,
707        * since we don't need to do any stack operations, and if we're executing
708        * currently, we want to just continue executing.
709        */
710       struct brw_instruction *next = &p->store[p->nr_insn];
711
712       assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
713
714       patch_insn->bits3.ud = (next - patch_insn) * 16;
715    } else {
716       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
717
718       brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
719       brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
720       brw_set_src1(insn, brw_imm_d(0x0));
721
722       insn->header.compression_control = BRW_COMPRESSION_NONE;
723       insn->header.execution_size = patch_insn->header.execution_size;
724       insn->header.mask_control = BRW_MASK_ENABLE;
725       insn->header.thread_control = BRW_THREAD_SWITCH;
726
727       assert(patch_insn->bits3.if_else.jump_count == 0);
728
729       /* Patch the if or else instructions to point at this or the next
730        * instruction respectively.
731        */
732       if (patch_insn->header.opcode == BRW_OPCODE_IF) {
733          /* Automagically turn it into an IFF:
734           */
735          patch_insn->header.opcode = BRW_OPCODE_IFF;
736          patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
737          patch_insn->bits3.if_else.pop_count = 0;
738          patch_insn->bits3.if_else.pad0 = 0;
739       } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
740          patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
741          patch_insn->bits3.if_else.pop_count = 1;
742          patch_insn->bits3.if_else.pad0 = 0;
743       } else {
744          assert(0);
745       }
746
747       /* Also pop item off the stack in the endif instruction:
748        */
749       insn->bits3.if_else.jump_count = 0;
750       insn->bits3.if_else.pop_count = 1;
751       insn->bits3.if_else.pad0 = 0;
752    }
753 }
754
755 struct brw_instruction *brw_BREAK(struct brw_compile *p)
756 {
757    struct brw_instruction *insn;
758    insn = next_insn(p, BRW_OPCODE_BREAK);
759    brw_set_dest(insn, brw_ip_reg());
760    brw_set_src0(insn, brw_ip_reg());
761    brw_set_src1(insn, brw_imm_d(0x0));
762    insn->header.compression_control = BRW_COMPRESSION_NONE;
763    insn->header.execution_size = BRW_EXECUTE_8;
764    /* insn->header.mask_control = BRW_MASK_DISABLE; */
765    insn->bits3.if_else.pad0 = 0;
766    return insn;
767 }
768
769 struct brw_instruction *brw_CONT(struct brw_compile *p)
770 {
771    struct brw_instruction *insn;
772    insn = next_insn(p, BRW_OPCODE_CONTINUE);
773    brw_set_dest(insn, brw_ip_reg());
774    brw_set_src0(insn, brw_ip_reg());
775    brw_set_src1(insn, brw_imm_d(0x0));
776    insn->header.compression_control = BRW_COMPRESSION_NONE;
777    insn->header.execution_size = BRW_EXECUTE_8;
778    /* insn->header.mask_control = BRW_MASK_DISABLE; */
779    insn->bits3.if_else.pad0 = 0;
780    return insn;
781 }
782
783 /* DO/WHILE loop:
784  */
785 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
786 {
787    if (p->single_program_flow) {
788       return &p->store[p->nr_insn];
789    } else {
790       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
791
792       /* Override the defaults for this instruction:
793        */
794       brw_set_dest(insn, brw_null_reg());
795       brw_set_src0(insn, brw_null_reg());
796       brw_set_src1(insn, brw_null_reg());
797
798       insn->header.compression_control = BRW_COMPRESSION_NONE;
799       insn->header.execution_size = execute_size;
800       insn->header.predicate_control = BRW_PREDICATE_NONE;
801       /* insn->header.mask_control = BRW_MASK_ENABLE; */
802       /* insn->header.mask_control = BRW_MASK_DISABLE; */
803
804       return insn;
805    }
806 }
807
808
809
810 struct brw_instruction *brw_WHILE(struct brw_compile *p, 
811                                   struct brw_instruction *do_insn)
812 {
813    struct brw_instruction *insn;
814    GLuint br = 1;
815
816    if (p->brw->gen == 5)
817       br = 2;
818
819    if (p->single_program_flow)
820       insn = next_insn(p, BRW_OPCODE_ADD);
821    else
822       insn = next_insn(p, BRW_OPCODE_WHILE);
823
824    brw_set_dest(insn, brw_ip_reg());
825    brw_set_src0(insn, brw_ip_reg());
826    brw_set_src1(insn, brw_imm_d(0x0));
827
828    insn->header.compression_control = BRW_COMPRESSION_NONE;
829
830    if (p->single_program_flow) {
831       insn->header.execution_size = BRW_EXECUTE_1;
832
833       insn->bits3.d = (do_insn - insn) * 16;
834    } else {
835       insn->header.execution_size = do_insn->header.execution_size;
836
837       assert(do_insn->header.opcode == BRW_OPCODE_DO);
838       insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
839       insn->bits3.if_else.pop_count = 0;
840       insn->bits3.if_else.pad0 = 0;
841    }
842
843 /*    insn->header.mask_control = BRW_MASK_ENABLE; */
844
845    /* insn->header.mask_control = BRW_MASK_DISABLE; */
846    p->current->header.predicate_control = BRW_PREDICATE_NONE;   
847    return insn;
848 }
849
850
851 /* FORWARD JUMPS:
852  */
853 void brw_land_fwd_jump(struct brw_compile *p, 
854                        struct brw_instruction *jmp_insn)
855 {
856    struct brw_instruction *landing = &p->store[p->nr_insn];
857    GLuint jmpi = 1;
858
859    if (p->brw->gen == 5)
860        jmpi = 2;
861
862    assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
863    assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
864
865    jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
866 }
867
868
869
870 /* To integrate with the above, it makes sense that the comparison
871  * instruction should populate the flag register.  It might be simpler
872  * just to use the flag reg for most WM tasks?
873  */
874 void brw_CMP(struct brw_compile *p,
875              struct brw_reg dest,
876              GLuint conditional,
877              struct brw_reg src0,
878              struct brw_reg src1)
879 {
880    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
881
882    insn->header.destreg__conditionalmod = conditional;
883    brw_set_dest(insn, dest);
884    brw_set_src0(insn, src0);
885    brw_set_src1(insn, src1);
886
887 /*    guess_execution_size(insn, src0); */
888
889
890    /* Make it so that future instructions will use the computed flag
891     * value until brw_set_predicate_control_flag_value() is called
892     * again.  
893     */
894    if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
895        dest.nr == 0) {
896       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
897       p->flag_value = 0xff;
898    }
899 }
900
901
902
903 /***********************************************************************
904  * Helpers for the various SEND message types:
905  */
906
907 /** Extended math function, float[8].
908  */
909 void brw_math( struct brw_compile *p,
910                struct brw_reg dest,
911                GLuint function,
912                GLuint saturate,
913                GLuint msg_reg_nr,
914                struct brw_reg src,
915                GLuint data_type,
916                GLuint precision )
917 {
918    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
919    GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
920    GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
921
922    /* Example code doesn't set predicate_control for send
923     * instructions.
924     */
925    insn->header.predicate_control = 0; 
926    insn->header.destreg__conditionalmod = msg_reg_nr;
927
928    brw_set_dest(insn, dest);
929    brw_set_src0(insn, src);
930    brw_set_math_message(p->brw,
931                         insn, 
932                         msg_length, response_length, 
933                         function,
934                         BRW_MATH_INTEGER_UNSIGNED,
935                         precision,
936                         saturate,
937                         data_type);
938 }
939
940 /**
941  * Extended math function, float[16].
942  * Use 2 send instructions.
943  */
944 void brw_math_16( struct brw_compile *p,
945                   struct brw_reg dest,
946                   GLuint function,
947                   GLuint saturate,
948                   GLuint msg_reg_nr,
949                   struct brw_reg src,
950                   GLuint precision )
951 {
952    struct brw_instruction *insn;
953    GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
954    GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
955
956    /* First instruction:
957     */
958    brw_push_insn_state(p);
959    brw_set_predicate_control_flag_value(p, 0xff);
960    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
961
962    insn = next_insn(p, BRW_OPCODE_SEND);
963    insn->header.destreg__conditionalmod = msg_reg_nr;
964
965    brw_set_dest(insn, dest);
966    brw_set_src0(insn, src);
967    brw_set_math_message(p->brw,
968                         insn, 
969                         msg_length, response_length, 
970                         function,
971                         BRW_MATH_INTEGER_UNSIGNED,
972                         precision,
973                         saturate,
974                         BRW_MATH_DATA_VECTOR);
975
976    /* Second instruction:
977     */
978    insn = next_insn(p, BRW_OPCODE_SEND);
979    insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
980    insn->header.destreg__conditionalmod = msg_reg_nr+1;
981
982    brw_set_dest(insn, offset(dest,1));
983    brw_set_src0(insn, src);
984    brw_set_math_message(p->brw, 
985                         insn, 
986                         msg_length, response_length, 
987                         function,
988                         BRW_MATH_INTEGER_UNSIGNED,
989                         precision,
990                         saturate,
991                         BRW_MATH_DATA_VECTOR);
992
993    brw_pop_insn_state(p);
994 }
995
996
997 /**
998  * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
999  * Scratch offset should be a multiple of 64.
1000  * Used for register spilling.
1001  */
1002 void brw_dp_WRITE_16( struct brw_compile *p,
1003                       struct brw_reg src,
1004                       GLuint scratch_offset )
1005 {
1006    GLuint msg_reg_nr = 1;
1007    {
1008       brw_push_insn_state(p);
1009       brw_set_mask_control(p, BRW_MASK_DISABLE);
1010       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1011
1012       /* set message header global offset field (reg 0, element 2) */
1013       brw_MOV(p,
1014               retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1015               brw_imm_d(scratch_offset));
1016
1017       brw_pop_insn_state(p);
1018    }
1019
1020    {
1021       GLuint msg_length = 3;
1022       struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1023       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1024    
1025       insn->header.predicate_control = 0; /* XXX */
1026       insn->header.compression_control = BRW_COMPRESSION_NONE; 
1027       insn->header.destreg__conditionalmod = msg_reg_nr;
1028   
1029       brw_set_dest(insn, dest);
1030       brw_set_src0(insn, src);
1031
1032       brw_set_dp_write_message(p->brw,
1033                                insn,
1034                                255, /* binding table index (255=stateless) */
1035                                BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1036                                BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1037                                msg_length,
1038                                0, /* pixel scoreboard */
1039                                0, /* response_length */
1040                                0); /* eot */
1041    }
1042 }
1043
1044
1045 /**
1046  * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1047  * Scratch offset should be a multiple of 64.
1048  * Used for register spilling.
1049  */
1050 void brw_dp_READ_16( struct brw_compile *p,
1051                       struct brw_reg dest,
1052                       GLuint scratch_offset )
1053 {
1054    GLuint msg_reg_nr = 1;
1055    {
1056       brw_push_insn_state(p);
1057       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1058       brw_set_mask_control(p, BRW_MASK_DISABLE);
1059
1060       /* set message header global offset field (reg 0, element 2) */
1061       brw_MOV(p,
1062               retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1063               brw_imm_d(scratch_offset));
1064
1065       brw_pop_insn_state(p);
1066    }
1067
1068    {
1069       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1070    
1071       insn->header.predicate_control = 0; /* XXX */
1072       insn->header.compression_control = BRW_COMPRESSION_NONE; 
1073       insn->header.destreg__conditionalmod = msg_reg_nr;
1074   
1075       brw_set_dest(insn, dest); /* UW? */
1076       brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1077
1078       brw_set_dp_read_message(p->brw,
1079                               insn,
1080                               255, /* binding table index (255=stateless) */
1081                               3,  /* msg_control (3 means 4 Owords) */
1082                               BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1083                               1, /* target cache (render/scratch) */
1084                               1, /* msg_length */
1085                               2, /* response_length */
1086                               0); /* eot */
1087    }
1088 }
1089
1090
1091 /**
1092  * Read a float[4] vector from the data port Data Cache (const buffer).
1093  * Location (in buffer) should be a multiple of 16.
1094  * Used for fetching shader constants.
1095  * If relAddr is true, we'll do an indirect fetch using the address register.
1096  */
1097 void brw_dp_READ_4( struct brw_compile *p,
1098                     struct brw_reg dest,
1099                     GLboolean relAddr,
1100                     GLuint location,
1101                     GLuint bind_table_index )
1102 {
1103    /* XXX: relAddr not implemented */
1104    GLuint msg_reg_nr = 1;
1105    {
1106       struct brw_reg b;
1107       brw_push_insn_state(p);
1108       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1109       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1110       brw_set_mask_control(p, BRW_MASK_DISABLE);
1111
1112    /* Setup MRF[1] with location/offset into const buffer */
1113       b = brw_message_reg(msg_reg_nr);
1114       b = retype(b, BRW_REGISTER_TYPE_UD);
1115       /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1116        * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1117        */
1118       brw_MOV(p, b, brw_imm_ud(location));
1119       brw_pop_insn_state(p);
1120    }
1121
1122    {
1123       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1124    
1125       insn->header.predicate_control = BRW_PREDICATE_NONE;
1126       insn->header.compression_control = BRW_COMPRESSION_NONE; 
1127       insn->header.destreg__conditionalmod = msg_reg_nr;
1128       insn->header.mask_control = BRW_MASK_DISABLE;
1129   
1130       /* cast dest to a uword[8] vector */
1131       dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1132
1133       brw_set_dest(insn, dest);
1134       brw_set_src0(insn, brw_null_reg());
1135
1136       brw_set_dp_read_message(p->brw,
1137                               insn,
1138                               bind_table_index,
1139                               0,  /* msg_control (0 means 1 Oword) */
1140                               BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1141                               0, /* source cache = data cache */
1142                               1, /* msg_length */
1143                               1, /* response_length (1 Oword) */
1144                               0); /* eot */
1145    }
1146 }
1147
1148
1149 /**
1150  * Read float[4] constant(s) from VS constant buffer.
1151  * For relative addressing, two float[4] constants will be read into 'dest'.
1152  * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1153  */
1154 void brw_dp_READ_4_vs(struct brw_compile *p,
1155                       struct brw_reg dest,
1156                       GLuint oword,
1157                       GLboolean relAddr,
1158                       struct brw_reg addrReg,
1159                       GLuint location,
1160                       GLuint bind_table_index)
1161 {
1162    GLuint msg_reg_nr = 1;
1163
1164    assert(oword < 2);
1165    /*
1166    printf("vs const read msg, location %u, msg_reg_nr %d\n",
1167           location, msg_reg_nr);
1168    */
1169
1170    /* Setup MRF[1] with location/offset into const buffer */
1171    {
1172       struct brw_reg b;
1173
1174       brw_push_insn_state(p);
1175       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1176       brw_set_mask_control(p, BRW_MASK_DISABLE);
1177       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1178       /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1179
1180       /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1181        * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1182        */
1183       b = brw_message_reg(msg_reg_nr);
1184       b = retype(b, BRW_REGISTER_TYPE_UD);
1185       /*b = get_element_ud(b, 2);*/
1186       if (relAddr) {
1187          brw_ADD(p, b, addrReg, brw_imm_ud(location));
1188       }
1189       else {
1190          brw_MOV(p, b, brw_imm_ud(location));
1191       }
1192
1193       brw_pop_insn_state(p);
1194    }
1195
1196    {
1197       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1198    
1199       insn->header.predicate_control = BRW_PREDICATE_NONE;
1200       insn->header.compression_control = BRW_COMPRESSION_NONE; 
1201       insn->header.destreg__conditionalmod = msg_reg_nr;
1202       insn->header.mask_control = BRW_MASK_DISABLE;
1203       /*insn->header.access_mode = BRW_ALIGN_16;*/
1204   
1205       brw_set_dest(insn, dest);
1206       brw_set_src0(insn, brw_null_reg());
1207
1208       brw_set_dp_read_message(p->brw,
1209                               insn,
1210                               bind_table_index,
1211                               oword,  /* 0 = lower Oword, 1 = upper Oword */
1212                               BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1213                               0, /* source cache = data cache */
1214                               1, /* msg_length */
1215                               1, /* response_length (1 Oword) */
1216                               0); /* eot */
1217    }
1218 }
1219
1220
1221
1222 void brw_fb_WRITE(struct brw_compile *p,
1223                   struct brw_reg dest,
1224                   GLuint msg_reg_nr,
1225                   struct brw_reg src0,
1226                   GLuint binding_table_index,
1227                   GLuint msg_length,
1228                   GLuint response_length,
1229                   GLboolean eot)
1230 {
1231    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1232    
1233    insn->header.predicate_control = 0; /* XXX */
1234    insn->header.compression_control = BRW_COMPRESSION_NONE; 
1235    insn->header.destreg__conditionalmod = msg_reg_nr;
1236   
1237    brw_set_dest(insn, dest);
1238    brw_set_src0(insn, src0);
1239    brw_set_dp_write_message(p->brw,
1240                             insn,
1241                             binding_table_index,
1242                             BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1243                             BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1244                             msg_length,
1245                             1,  /* pixel scoreboard */
1246                             response_length, 
1247                             eot);
1248 }
1249
1250
1251 /**
1252  * Texture sample instruction.
1253  * Note: the msg_type plus msg_length values determine exactly what kind
1254  * of sampling operation is performed.  See volume 4, page 161 of docs.
1255  */
1256 void brw_SAMPLE(struct brw_compile *p,
1257                 struct brw_reg dest,
1258                 GLuint msg_reg_nr,
1259                 struct brw_reg src0,
1260                 GLuint binding_table_index,
1261                 GLuint sampler,
1262                 GLuint writemask,
1263                 GLuint msg_type,
1264                 GLuint response_length,
1265                 GLuint msg_length,
1266                 GLboolean eot,
1267                 GLuint header_present,
1268                 GLuint simd_mode)
1269 {
1270    GLboolean need_stall = 0;
1271    
1272    if (writemask == 0) {
1273       /*debug_printf("%s: zero writemask??\n", __FUNCTION__); */
1274       return;
1275    }
1276    
1277    /* Hardware doesn't do destination dependency checking on send
1278     * instructions properly.  Add a workaround which generates the
1279     * dependency by other means.  In practice it seems like this bug
1280     * only crops up for texture samples, and only where registers are
1281     * written by the send and then written again later without being
1282     * read in between.  Luckily for us, we already track that
1283     * information and use it to modify the writemask for the
1284     * instruction, so that is a guide for whether a workaround is
1285     * needed.
1286     */
1287    if (writemask != BRW_WRITEMASK_XYZW) {
1288       GLuint dst_offset = 0;
1289       GLuint i, newmask = 0, len = 0;
1290
1291       for (i = 0; i < 4; i++) {
1292          if (writemask & (1<<i))
1293             break;
1294          dst_offset += 2;
1295       }
1296       for (; i < 4; i++) {
1297          if (!(writemask & (1<<i)))
1298             break;
1299          newmask |= 1<<i;
1300          len++;
1301       }
1302
1303       if (newmask != writemask) {
1304          need_stall = 1;
1305          /* debug_printf("need stall %x %x\n", newmask , writemask); */
1306       }
1307       else {
1308          struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1309          
1310          newmask = ~newmask & BRW_WRITEMASK_XYZW;
1311
1312          brw_push_insn_state(p);
1313
1314          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1315          brw_set_mask_control(p, BRW_MASK_DISABLE);
1316
1317          brw_MOV(p, m1, brw_vec8_grf(0,0));      
1318          brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 
1319
1320          brw_pop_insn_state(p);
1321
1322          src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
1323          dest = offset(dest, dst_offset);
1324          response_length = len * 2;
1325       }
1326    }
1327
1328    {
1329       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1330    
1331       insn->header.predicate_control = 0; /* XXX */
1332       insn->header.compression_control = BRW_COMPRESSION_NONE;
1333       insn->header.destreg__conditionalmod = msg_reg_nr;
1334
1335       brw_set_dest(insn, dest);
1336       brw_set_src0(insn, src0);
1337       brw_set_sampler_message(p->brw, insn,
1338                               binding_table_index,
1339                               sampler,
1340                               msg_type,
1341                               response_length, 
1342                               msg_length,
1343                               eot,
1344                               header_present,
1345                               simd_mode);
1346    }
1347
1348    if (need_stall) {
1349       struct brw_reg reg = vec8(offset(dest, response_length-1));
1350
1351       /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1352        */
1353       brw_push_insn_state(p);
1354       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1355       brw_MOV(p, reg, reg);           
1356       brw_pop_insn_state(p);
1357    }
1358
1359 }
1360
1361 /* All these variables are pretty confusing - we might be better off
1362  * using bitmasks and macros for this, in the old style.  Or perhaps
1363  * just having the caller instantiate the fields in dword3 itself.
1364  */
1365 void brw_urb_WRITE(struct brw_compile *p,
1366                    struct brw_reg dest,
1367                    GLuint msg_reg_nr,
1368                    struct brw_reg src0,
1369                    GLboolean allocate,
1370                    GLboolean used,
1371                    GLuint msg_length,
1372                    GLuint response_length,
1373                    GLboolean eot,
1374                    GLboolean writes_complete,
1375                    GLuint offset,
1376                    GLuint swizzle)
1377 {
1378    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1379
1380    assert(msg_length < BRW_MAX_MRF);
1381
1382    brw_set_dest(insn, dest);
1383    brw_set_src0(insn, src0);
1384    brw_set_src1(insn, brw_imm_d(0));
1385
1386    insn->header.destreg__conditionalmod = msg_reg_nr;
1387
1388    brw_set_urb_message(p->brw,
1389                        insn,
1390                        allocate,
1391                        used,
1392                        msg_length,
1393                        response_length, 
1394                        eot, 
1395                        writes_complete, 
1396                        offset,
1397                        swizzle);
1398 }
1399
1400 void brw_ff_sync(struct brw_compile *p,
1401                    struct brw_reg dest,
1402                    GLuint msg_reg_nr,
1403                    struct brw_reg src0,
1404                    GLboolean allocate,
1405                    GLboolean used,
1406                    GLuint msg_length,
1407                    GLuint response_length,
1408                    GLboolean eot,
1409                    GLboolean writes_complete,
1410                    GLuint offset,
1411                    GLuint swizzle)
1412 {
1413    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1414
1415    assert(msg_length < 16);
1416
1417    brw_set_dest(insn, dest);
1418    brw_set_src0(insn, src0);
1419    brw_set_src1(insn, brw_imm_d(0));
1420
1421    insn->header.destreg__conditionalmod = msg_reg_nr;
1422
1423    brw_set_ff_sync_message(p->brw,
1424                        insn,
1425                        allocate,
1426                        used,
1427                        msg_length,
1428                        response_length, 
1429                        eot, 
1430                        writes_complete, 
1431                        offset,
1432                        swizzle);
1433 }