Upload Tizen2.0 source
[framework/graphics/cairo.git] / src / drm / cairo-drm-intel-brw-eu-emit.c
1 /*
2    Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3    Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4    develop this 3D driver.
5
6    Permission is hereby granted, free of charge, to any person obtaining
7    a copy of this software and associated documentation files (the
8    "Software"), to deal in the Software without restriction, including
9    without limitation the rights to use, copy, modify, merge, publish,
10    distribute, sublicense, and/or sell copies of the Software, and to
11    permit persons to whom the Software is furnished to do so, subject to
12    the following conditions:
13
14    The above copyright notice and this permission notice (including the
15    next paragraph) shall be included in all copies or substantial
16    portions of the Software.
17
18    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21    IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23    OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26  **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32 #include "cairoint.h"
33 #include "cairo-drm-intel-brw-eu.h"
34
35 #include <string.h>
36
37 /***********************************************************************
38  * Internal helper for constructing instructions
39  */
40
41 static void guess_execution_size( struct brw_instruction *insn,
42                                   struct brw_reg reg )
43 {
44     if (reg.width == BRW_WIDTH_8 &&
45         insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
46         insn->header.execution_size = BRW_EXECUTE_16;
47     else
48         insn->header.execution_size = reg.width;        /* note - definitions are compatible */
49 }
50
51
52 void
53 brw_instruction_set_destination (struct brw_instruction *insn,
54                                  struct brw_reg dest)
55 {
56     insn->bits1.da1.dest_reg_file = dest.file;
57     insn->bits1.da1.dest_reg_type = dest.type;
58     insn->bits1.da1.dest_address_mode = dest.address_mode;
59
60     if (dest.address_mode == BRW_ADDRESS_DIRECT) {
61         insn->bits1.da1.dest_reg_nr = dest.nr;
62
63         if (insn->header.access_mode == BRW_ALIGN_1) {
64             insn->bits1.da1.dest_subreg_nr = dest.subnr;
65             if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
66                 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
67             insn->bits1.da1.dest_horiz_stride = dest.hstride;
68         } else {
69             insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
70             insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
71         }
72     } else {
73         insn->bits1.ia1.dest_subreg_nr = dest.subnr;
74
75         /* These are different sizes in align1 vs align16:
76         */
77         if (insn->header.access_mode == BRW_ALIGN_1) {
78             insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
79             if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
80                 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
81             insn->bits1.ia1.dest_horiz_stride = dest.hstride;
82         } else {
83             insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
84         }
85     }
86
87     /* NEW: Set the execution size based on dest.width and
88      * insn->compression_control:
89      */
90     guess_execution_size(insn, dest);
91 }
92
93 void
94 brw_instruction_set_source0 (struct brw_instruction *insn,
95                              struct brw_reg reg)
96 {
97     assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
98
99     insn->bits1.da1.src0_reg_file = reg.file;
100     insn->bits1.da1.src0_reg_type = reg.type;
101     insn->bits2.da1.src0_abs = reg.abs;
102     insn->bits2.da1.src0_negate = reg.negate;
103     insn->bits2.da1.src0_address_mode = reg.address_mode;
104
105     if (reg.file == BRW_IMMEDIATE_VALUE) {
106         insn->bits3.ud = reg.dw1.ud;
107
108         /* Required to set some fields in src1 as well:
109         */
110         insn->bits1.da1.src1_reg_file = 0; /* arf */
111         insn->bits1.da1.src1_reg_type = reg.type;
112     } else {
113         if (reg.address_mode == BRW_ADDRESS_DIRECT) {
114             if (insn->header.access_mode == BRW_ALIGN_1) {
115                 insn->bits2.da1.src0_subreg_nr = reg.subnr;
116                 insn->bits2.da1.src0_reg_nr = reg.nr;
117             } else {
118                 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
119                 insn->bits2.da16.src0_reg_nr = reg.nr;
120             }
121         } else {
122             insn->bits2.ia1.src0_subreg_nr = reg.subnr;
123
124             if (insn->header.access_mode == BRW_ALIGN_1) {
125                 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
126             } else {
127                 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
128             }
129         }
130
131         if (insn->header.access_mode == BRW_ALIGN_1) {
132             if (reg.width == BRW_WIDTH_1 &&
133                 insn->header.execution_size == BRW_EXECUTE_1) {
134                 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
135                 insn->bits2.da1.src0_width = BRW_WIDTH_1;
136                 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
137             } else {
138                 insn->bits2.da1.src0_horiz_stride = reg.hstride;
139                 insn->bits2.da1.src0_width = reg.width;
140                 insn->bits2.da1.src0_vert_stride = reg.vstride;
141             }
142         } else {
143             insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
144             insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
145             insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
146             insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
147
148             /* This is an oddity of the fact we're using the same
149              * descriptions for registers in align_16 as align_1:
150              */
151             if (reg.vstride == BRW_VERTICAL_STRIDE_8)
152                 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
153             else
154                 insn->bits2.da16.src0_vert_stride = reg.vstride;
155         }
156     }
157 }
158
159
160 void brw_set_src1( struct brw_instruction *insn,
161                    struct brw_reg reg )
162 {
163     assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
164
165     insn->bits1.da1.src1_reg_file = reg.file;
166     insn->bits1.da1.src1_reg_type = reg.type;
167     insn->bits3.da1.src1_abs = reg.abs;
168     insn->bits3.da1.src1_negate = reg.negate;
169
170     /* Only src1 can be immediate in two-argument instructions.
171     */
172     assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
173
174     if (reg.file == BRW_IMMEDIATE_VALUE) {
175         insn->bits3.ud = reg.dw1.ud;
176     }
177     else {
178         /* This is a hardware restriction, which may or may not be lifted
179          * in the future:
180          */
181         assert (reg.address_mode == BRW_ADDRESS_DIRECT);
182         //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
183
184         if (insn->header.access_mode == BRW_ALIGN_1) {
185             insn->bits3.da1.src1_subreg_nr = reg.subnr;
186             insn->bits3.da1.src1_reg_nr = reg.nr;
187         }
188         else {
189             insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
190             insn->bits3.da16.src1_reg_nr = reg.nr;
191         }
192
193         if (insn->header.access_mode == BRW_ALIGN_1) {
194             if (reg.width == BRW_WIDTH_1 &&
195                 insn->header.execution_size == BRW_EXECUTE_1) {
196                 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
197                 insn->bits3.da1.src1_width = BRW_WIDTH_1;
198                 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
199             }
200             else {
201                 insn->bits3.da1.src1_horiz_stride = reg.hstride;
202                 insn->bits3.da1.src1_width = reg.width;
203                 insn->bits3.da1.src1_vert_stride = reg.vstride;
204             }
205         }
206         else {
207             insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
208             insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
209             insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
210             insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
211
212             /* This is an oddity of the fact we're using the same
213              * descriptions for registers in align_16 as align_1:
214              */
215             if (reg.vstride == BRW_VERTICAL_STRIDE_8)
216                 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
217             else
218                 insn->bits3.da16.src1_vert_stride = reg.vstride;
219         }
220     }
221 }
222
223
224
225 static void brw_set_math_message( struct brw_instruction *insn,
226                                   uint32_t msg_length,
227                                   uint32_t response_length,
228                                   uint32_t function,
229                                   uint32_t integer_type,
230                                   int low_precision,
231                                   int saturate,
232                                   uint32_t dataType )
233 {
234     brw_set_src1 (insn, brw_imm_d (0));
235
236     insn->bits3.math.function = function;
237     insn->bits3.math.int_type = integer_type;
238     insn->bits3.math.precision = low_precision;
239     insn->bits3.math.saturate = saturate;
240     insn->bits3.math.data_type = dataType;
241     insn->bits3.math.response_length = response_length;
242     insn->bits3.math.msg_length = msg_length;
243     insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
244     insn->bits3.math.end_of_thread = 0;
245 }
246
247 static void brw_set_urb_message( struct brw_instruction *insn,
248                                  int allocate,
249                                  int used,
250                                  uint32_t msg_length,
251                                  uint32_t response_length,
252                                  int end_of_thread,
253                                  int complete,
254                                  uint32_t offset,
255                                  uint32_t swizzle_control )
256 {
257     brw_set_src1 (insn, brw_imm_d (0));
258
259     insn->bits3.urb.opcode = 0; /* ? */
260     insn->bits3.urb.offset = offset;
261     insn->bits3.urb.swizzle_control = swizzle_control;
262     insn->bits3.urb.allocate = allocate;
263     insn->bits3.urb.used = used;        /* ? */
264     insn->bits3.urb.complete = complete;
265     insn->bits3.urb.response_length = response_length;
266     insn->bits3.urb.msg_length = msg_length;
267     insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
268     insn->bits3.urb.end_of_thread = end_of_thread;
269 }
270
271 void
272 brw_instruction_set_dp_write_message (struct brw_instruction *insn,
273                                       uint32_t binding_table_index,
274                                       uint32_t msg_control,
275                                       uint32_t msg_type,
276                                       uint32_t msg_length,
277                                       uint32_t pixel_scoreboard_clear,
278                                       uint32_t response_length,
279                                       uint32_t end_of_thread)
280 {
281     brw_set_src1 (insn, brw_imm_d (0));
282
283     insn->bits3.dp_write.binding_table_index = binding_table_index;
284     insn->bits3.dp_write.msg_control = msg_control;
285     insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
286     insn->bits3.dp_write.msg_type = msg_type;
287     insn->bits3.dp_write.send_commit_msg = 0;
288     insn->bits3.dp_write.response_length = response_length;
289     insn->bits3.dp_write.msg_length = msg_length;
290     insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
291     insn->bits3.urb.end_of_thread = end_of_thread;
292 }
293
294 static void brw_set_dp_read_message( struct brw_instruction *insn,
295                                      uint32_t binding_table_index,
296                                      uint32_t msg_control,
297                                      uint32_t msg_type,
298                                      uint32_t target_cache,
299                                      uint32_t msg_length,
300                                      uint32_t response_length,
301                                      uint32_t end_of_thread )
302 {
303     brw_set_src1 (insn, brw_imm_d (0));
304
305     insn->bits3.dp_read.binding_table_index = binding_table_index;
306     insn->bits3.dp_read.msg_control = msg_control;
307     insn->bits3.dp_read.msg_type = msg_type;
308     insn->bits3.dp_read.target_cache = target_cache;
309     insn->bits3.dp_read.response_length = response_length;
310     insn->bits3.dp_read.msg_length = msg_length;
311     insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
312     insn->bits3.dp_read.end_of_thread = end_of_thread;
313 }
314
315 static void
316 brw_set_sampler_message (struct brw_instruction *insn,
317                          cairo_bool_t is_g4x,
318                          uint32_t binding_table_index,
319                          uint32_t sampler,
320                          uint32_t msg_type,
321                          uint32_t response_length,
322                          uint32_t msg_length,
323                          cairo_bool_t eot)
324 {
325     brw_set_src1 (insn, brw_imm_d (0));
326
327     if (is_g4x) {
328         /* XXX presume the driver is sane! */
329         insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
330         insn->bits3.sampler_g4x.sampler = sampler;
331         insn->bits3.sampler_g4x.msg_type = msg_type;
332         insn->bits3.sampler_g4x.response_length = response_length;
333         insn->bits3.sampler_g4x.msg_length = msg_length;
334         insn->bits3.sampler_g4x.end_of_thread = eot;
335         insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
336     } else {
337         insn->bits3.sampler.binding_table_index = binding_table_index;
338         insn->bits3.sampler.sampler = sampler;
339         insn->bits3.sampler.msg_type = msg_type;
340         insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
341         insn->bits3.sampler.response_length = response_length;
342         insn->bits3.sampler.msg_length = msg_length;
343         insn->bits3.sampler.end_of_thread = eot;
344         insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
345     }
346 }
347
348 struct brw_instruction *
349 brw_next_instruction (struct brw_compile *p,
350                       uint32_t opcode)
351 {
352     struct brw_instruction *insn;
353
354     assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
355
356     insn = &p->store[p->nr_insn++];
357     memcpy(insn, p->current, sizeof(*insn));
358
359     /* Reset this one-shot flag: */
360     if (p->current->header.destreg__conditonalmod) {
361         p->current->header.destreg__conditonalmod = 0;
362         p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
363     }
364
365     insn->header.opcode = opcode;
366     return insn;
367 }
368
369 static struct brw_instruction *brw_alu1( struct brw_compile *p,
370                                          uint32_t opcode,
371                                          struct brw_reg dest,
372                                          struct brw_reg src )
373 {
374     struct brw_instruction *insn = brw_next_instruction(p, opcode);
375     brw_instruction_set_destination(insn, dest);
376     brw_instruction_set_source0(insn, src);
377     return insn;
378 }
379
380 static struct brw_instruction *brw_alu2(struct brw_compile *p,
381                                         uint32_t opcode,
382                                         struct brw_reg dest,
383                                         struct brw_reg src0,
384                                         struct brw_reg src1 )
385 {
386     struct brw_instruction *insn = brw_next_instruction(p, opcode);
387     brw_instruction_set_destination(insn, dest);
388     brw_instruction_set_source0(insn, src0);
389     brw_set_src1(insn, src1);
390     return insn;
391 }
392
393
394 /***********************************************************************
395  * Convenience routines.
396  */
397 #define ALU1(OP)                                        \
398     struct brw_instruction *brw_##OP(struct brw_compile *p,                     \
399                                      struct brw_reg dest,                       \
400                                      struct brw_reg src0)                       \
401 {                                                       \
402     return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    \
403 }
404
405 #define ALU2(OP)                                        \
406     struct brw_instruction *brw_##OP(struct brw_compile *p,                     \
407                                      struct brw_reg dest,                       \
408                                      struct brw_reg src0,                       \
409                                      struct brw_reg src1)                       \
410 {                                                       \
411     return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);      \
412 }
413
414
415     ALU1(MOV)
416     ALU2(SEL)
417     ALU1(NOT)
418     ALU2(AND)
419     ALU2(OR)
420     ALU2(XOR)
421     ALU2(SHR)
422     ALU2(SHL)
423     ALU2(RSR)
424     ALU2(RSL)
425     ALU2(ASR)
426     ALU2(ADD)
427     ALU2(MUL)
428     ALU1(FRC)
429     ALU1(RNDD)
430     ALU1(RNDZ)
431     ALU2(MAC)
432     ALU2(MACH)
433     ALU1(LZD)
434     ALU2(DP4)
435     ALU2(DPH)
436     ALU2(DP3)
437     ALU2(DP2)
438 ALU2(LINE)
439
440
441
442
443 void brw_NOP(struct brw_compile *p)
444 {
445     struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_NOP);
446     brw_instruction_set_destination(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
447     brw_instruction_set_source0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
448     brw_set_src1(insn, brw_imm_ud(0x0));
449 }
450
451
452
453
454
455 /***********************************************************************
456  * Comparisons, if/else/endif
457  */
458
459 struct brw_instruction *brw_JMPI(struct brw_compile *p,
460                                  struct brw_reg dest,
461                                  struct brw_reg src0,
462                                  struct brw_reg src1)
463 {
464     struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
465
466     p->current->header.predicate_control = BRW_PREDICATE_NONE;
467
468     return insn;
469 }
470
471 /* EU takes the value from the flag register and pushes it onto some
472  * sort of a stack (presumably merging with any flag value already on
473  * the stack).  Within an if block, the flags at the top of the stack
474  * control execution on each channel of the unit, eg. on each of the
475  * 16 pixel values in our wm programs.
476  *
477  * When the matching 'else' instruction is reached (presumably by
478  * countdown of the instruction count patched in by our ELSE/ENDIF
479  * functions), the relevant flags are inverted.
480  *
481  * When the matching 'endif' instruction is reached, the flags are
482  * popped off.  If the stack is now empty, normal execution resumes.
483  *
484  * No attempt is made to deal with stack overflow (14 elements?).
485  */
486 struct brw_instruction *brw_IF(struct brw_compile *p, uint32_t execute_size)
487 {
488     struct brw_instruction *insn;
489
490     if (p->single_program_flow) {
491         assert(execute_size == BRW_EXECUTE_1);
492
493         insn = brw_next_instruction(p, BRW_OPCODE_ADD);
494         insn->header.predicate_inverse = 1;
495     } else {
496         insn = brw_next_instruction(p, BRW_OPCODE_IF);
497     }
498
499     /* Override the defaults for this instruction:
500     */
501     brw_instruction_set_destination (insn, brw_ip_reg ());
502     brw_instruction_set_source0 (insn, brw_ip_reg ());
503     brw_set_src1 (insn, brw_imm_d (0));
504
505     insn->header.execution_size = execute_size;
506     insn->header.compression_control = BRW_COMPRESSION_NONE;
507     insn->header.predicate_control = BRW_PREDICATE_NORMAL;
508     insn->header.mask_control = BRW_MASK_ENABLE;
509     if (!p->single_program_flow)
510         insn->header.thread_control = BRW_THREAD_SWITCH;
511
512     p->current->header.predicate_control = BRW_PREDICATE_NONE;
513
514     return insn;
515 }
516
517
518 struct brw_instruction *brw_ELSE(struct brw_compile *p,
519                                  struct brw_instruction *if_insn)
520 {
521     struct brw_instruction *insn;
522
523     if (p->single_program_flow) {
524         insn = brw_next_instruction(p, BRW_OPCODE_ADD);
525     } else {
526         insn = brw_next_instruction(p, BRW_OPCODE_ELSE);
527     }
528
529     brw_instruction_set_destination (insn, brw_ip_reg ());
530     brw_instruction_set_source0 (insn, brw_ip_reg ());
531     brw_set_src1 (insn, brw_imm_d (0));
532
533     insn->header.compression_control = BRW_COMPRESSION_NONE;
534     insn->header.execution_size = if_insn->header.execution_size;
535     insn->header.mask_control = BRW_MASK_ENABLE;
536     if (!p->single_program_flow)
537         insn->header.thread_control = BRW_THREAD_SWITCH;
538
539     /* Patch the if instruction to point at this instruction.
540     */
541     if (p->single_program_flow) {
542         assert(if_insn->header.opcode == BRW_OPCODE_ADD);
543
544         if_insn->bits3.ud = (insn - if_insn + 1) * 16;
545     } else {
546         assert(if_insn->header.opcode == BRW_OPCODE_IF);
547
548         if_insn->bits3.if_else.jump_count = insn - if_insn;
549         if_insn->bits3.if_else.pop_count = 1;
550         if_insn->bits3.if_else.pad0 = 0;
551     }
552
553     return insn;
554 }
555
556 void brw_ENDIF(struct brw_compile *p,
557                struct brw_instruction *patch_insn)
558 {
559     if (p->single_program_flow) {
560         /* In single program flow mode, there's no need to execute an ENDIF,
561          * since we don't need to do any stack operations, and if we're executing
562          * currently, we want to just continue executing.
563          */
564         struct brw_instruction *next = &p->store[p->nr_insn];
565
566         assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
567
568         patch_insn->bits3.ud = (next - patch_insn) * 16;
569     } else {
570         struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_ENDIF);
571
572         brw_instruction_set_destination(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
573         brw_instruction_set_source0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
574         brw_set_src1 (insn, brw_imm_d (0));
575
576         insn->header.compression_control = BRW_COMPRESSION_NONE;
577         insn->header.execution_size = patch_insn->header.execution_size;
578         insn->header.mask_control = BRW_MASK_ENABLE;
579         insn->header.thread_control = BRW_THREAD_SWITCH;
580
581         assert(patch_insn->bits3.if_else.jump_count == 0);
582
583         /* Patch the if or else instructions to point at this or the next
584          * instruction respectively.
585          */
586         if (patch_insn->header.opcode == BRW_OPCODE_IF) {
587             /* Automagically turn it into an IFF:
588             */
589             patch_insn->header.opcode = BRW_OPCODE_IFF;
590             patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
591             patch_insn->bits3.if_else.pop_count = 0;
592             patch_insn->bits3.if_else.pad0 = 0;
593         } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
594             patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
595             patch_insn->bits3.if_else.pop_count = 1;
596             patch_insn->bits3.if_else.pad0 = 0;
597         } else {
598             assert(0);
599         }
600
601         /* Also pop item off the stack in the endif instruction:
602         */
603         insn->bits3.if_else.jump_count = 0;
604         insn->bits3.if_else.pop_count = 1;
605         insn->bits3.if_else.pad0 = 0;
606     }
607 }
608
609 struct brw_instruction *brw_BREAK(struct brw_compile *p)
610 {
611     struct brw_instruction *insn;
612     insn = brw_next_instruction(p, BRW_OPCODE_BREAK);
613     brw_instruction_set_destination(insn, brw_ip_reg());
614     brw_instruction_set_source0(insn, brw_ip_reg());
615     brw_set_src1(insn, brw_imm_d (0));
616     insn->header.compression_control = BRW_COMPRESSION_NONE;
617     insn->header.execution_size = BRW_EXECUTE_8;
618     /* insn->header.mask_control = BRW_MASK_DISABLE; */
619     insn->bits3.if_else.pad0 = 0;
620     return insn;
621 }
622
623 struct brw_instruction *brw_CONT(struct brw_compile *p)
624 {
625     struct brw_instruction *insn;
626     insn = brw_next_instruction(p, BRW_OPCODE_CONTINUE);
627     brw_instruction_set_destination(insn, brw_ip_reg());
628     brw_instruction_set_source0(insn, brw_ip_reg());
629     brw_set_src1 (insn, brw_imm_d (0));
630     insn->header.compression_control = BRW_COMPRESSION_NONE;
631     insn->header.execution_size = BRW_EXECUTE_8;
632     /* insn->header.mask_control = BRW_MASK_DISABLE; */
633     insn->bits3.if_else.pad0 = 0;
634     return insn;
635 }
636
637 /* DO/WHILE loop:
638 */
639 struct brw_instruction *brw_DO(struct brw_compile *p, uint32_t execute_size)
640 {
641     if (p->single_program_flow) {
642         return &p->store[p->nr_insn];
643     } else {
644         struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_DO);
645
646         /* Override the defaults for this instruction:
647         */
648         brw_instruction_set_destination(insn, brw_null_reg());
649         brw_instruction_set_source0(insn, brw_null_reg());
650         brw_set_src1(insn, brw_null_reg());
651
652         insn->header.compression_control = BRW_COMPRESSION_NONE;
653         insn->header.execution_size = execute_size;
654         insn->header.predicate_control = BRW_PREDICATE_NONE;
655         /* insn->header.mask_control = BRW_MASK_ENABLE; */
656         /* insn->header.mask_control = BRW_MASK_DISABLE; */
657
658         return insn;
659     }
660 }
661
662
663
664 struct brw_instruction *brw_WHILE(struct brw_compile *p,
665                                   struct brw_instruction *do_insn)
666 {
667     struct brw_instruction *insn;
668
669     if (p->single_program_flow)
670         insn = brw_next_instruction(p, BRW_OPCODE_ADD);
671     else
672         insn = brw_next_instruction(p, BRW_OPCODE_WHILE);
673
674     brw_instruction_set_destination(insn, brw_ip_reg());
675     brw_instruction_set_source0(insn, brw_ip_reg());
676     brw_set_src1 (insn, brw_imm_d (0));
677
678     insn->header.compression_control = BRW_COMPRESSION_NONE;
679
680     if (p->single_program_flow) {
681         insn->header.execution_size = BRW_EXECUTE_1;
682
683         insn->bits3.d = (do_insn - insn) * 16;
684     } else {
685         insn->header.execution_size = do_insn->header.execution_size;
686
687         assert(do_insn->header.opcode == BRW_OPCODE_DO);
688         insn->bits3.if_else.jump_count = do_insn - insn + 1;
689         insn->bits3.if_else.pop_count = 0;
690         insn->bits3.if_else.pad0 = 0;
691     }
692
693     /*    insn->header.mask_control = BRW_MASK_ENABLE; */
694
695     /* insn->header.mask_control = BRW_MASK_DISABLE; */
696     p->current->header.predicate_control = BRW_PREDICATE_NONE;
697     return insn;
698 }
699
700
701 /* FORWARD JUMPS:
702 */
703 void brw_land_fwd_jump(struct brw_compile *p,
704                        struct brw_instruction *jmp_insn)
705 {
706     struct brw_instruction *landing = &p->store[p->nr_insn];
707
708     assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
709     assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
710
711     jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
712 }
713
714
715
716 /* To integrate with the above, it makes sense that the comparison
717  * instruction should populate the flag register.  It might be simpler
718  * just to use the flag reg for most WM tasks?
719  */
720 void brw_CMP(struct brw_compile *p,
721              struct brw_reg dest,
722              uint32_t conditional,
723              struct brw_reg src0,
724              struct brw_reg src1)
725 {
726     struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_CMP);
727
728     insn->header.destreg__conditonalmod = conditional;
729     brw_instruction_set_destination(insn, dest);
730     brw_instruction_set_source0(insn, src0);
731     brw_set_src1(insn, src1);
732
733     /*    guess_execution_size(insn, src0); */
734
735
736     /* Make it so that future instructions will use the computed flag
737      * value until brw_set_predicate_control_flag_value() is called
738      * again.
739      */
740     if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
741         dest.nr == 0) {
742         p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
743         p->flag_value = 0xff;
744     }
745 }
746
747
748
749 /***********************************************************************
750  * Helpers for the various SEND message types:
751  */
752
753 /* Invert 8 values
754 */
755 void brw_math( struct brw_compile *p,
756                struct brw_reg dest,
757                uint32_t function,
758                uint32_t saturate,
759                uint32_t msg_reg_nr,
760                struct brw_reg src,
761                uint32_t data_type,
762                uint32_t precision )
763 {
764     struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
765     uint32_t msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
766     uint32_t response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
767
768     /* Example code doesn't set predicate_control for send
769      * instructions.
770      */
771     insn->header.predicate_control = 0;
772     insn->header.destreg__conditonalmod = msg_reg_nr;
773
774     response_length = 1;
775
776     brw_instruction_set_destination(insn, dest);
777     brw_instruction_set_source0(insn, src);
778     brw_set_math_message(insn,
779                          msg_length, response_length,
780                          function,
781                          BRW_MATH_INTEGER_UNSIGNED,
782                          precision,
783                          saturate,
784                          data_type);
785 }
786
787 /* Use 2 send instructions to invert 16 elements
788 */
789 void brw_math_16( struct brw_compile *p,
790                   struct brw_reg dest,
791                   uint32_t function,
792                   uint32_t saturate,
793                   uint32_t msg_reg_nr,
794                   struct brw_reg src,
795                   uint32_t precision )
796 {
797     struct brw_instruction *insn;
798     uint32_t msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
799     uint32_t response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
800
801     /* First instruction:
802     */
803     brw_push_insn_state(p);
804     brw_set_predicate_control_flag_value(p, 0xff);
805     brw_set_compression_control(p, BRW_COMPRESSION_NONE);
806
807     insn = brw_next_instruction(p, BRW_OPCODE_SEND);
808     insn->header.destreg__conditonalmod = msg_reg_nr;
809
810     brw_instruction_set_destination(insn, dest);
811     brw_instruction_set_source0(insn, src);
812     brw_set_math_message(insn,
813                          msg_length, response_length,
814                          function,
815                          BRW_MATH_INTEGER_UNSIGNED,
816                          precision,
817                          saturate,
818                          BRW_MATH_DATA_VECTOR);
819
820     /* Second instruction:
821     */
822     insn = brw_next_instruction(p, BRW_OPCODE_SEND);
823     insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
824     insn->header.destreg__conditonalmod = msg_reg_nr+1;
825
826     brw_instruction_set_destination(insn, offset(dest,1));
827     brw_instruction_set_source0(insn, src);
828     brw_set_math_message(insn,
829                          msg_length, response_length,
830                          function,
831                          BRW_MATH_INTEGER_UNSIGNED,
832                          precision,
833                          saturate,
834                          BRW_MATH_DATA_VECTOR);
835
836     brw_pop_insn_state(p);
837 }
838
839
840
841
842 void brw_dp_WRITE_16( struct brw_compile *p,
843                       struct brw_reg src,
844                       uint32_t msg_reg_nr,
845                       uint32_t scratch_offset )
846 {
847     {
848         brw_push_insn_state(p);
849         brw_set_mask_control(p, BRW_MASK_DISABLE);
850         brw_set_compression_control(p, BRW_COMPRESSION_NONE);
851
852         brw_MOV (p,
853                 retype (brw_vec1_grf (0, 2), BRW_REGISTER_TYPE_D),
854                 brw_imm_d (scratch_offset));
855
856         brw_pop_insn_state(p);
857     }
858
859     {
860         uint32_t msg_length = 3;
861         struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
862         struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
863
864         insn->header.predicate_control = 0; /* XXX */
865         insn->header.compression_control = BRW_COMPRESSION_NONE;
866         insn->header.destreg__conditonalmod = msg_reg_nr;
867
868         brw_instruction_set_destination(insn, dest);
869         brw_instruction_set_source0(insn, src);
870
871         brw_instruction_set_dp_write_message(insn,
872                                              255, /* bti */
873                                              BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
874                                              BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
875                                              msg_length,
876                                              0, /* pixel scoreboard */
877                                              0, /* response_length */
878                                              0); /* eot */
879     }
880
881 }
882
883
884 void brw_dp_READ_16( struct brw_compile *p,
885                      struct brw_reg dest,
886                      uint32_t msg_reg_nr,
887                      uint32_t scratch_offset )
888 {
889     {
890         brw_push_insn_state(p);
891         brw_set_compression_control(p, BRW_COMPRESSION_NONE);
892         brw_set_mask_control(p, BRW_MASK_DISABLE);
893
894         brw_MOV (p,
895                 retype (brw_vec1_grf (0, 2), BRW_REGISTER_TYPE_D),
896                 brw_imm_d (scratch_offset));
897
898         brw_pop_insn_state(p);
899     }
900
901     {
902         struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
903
904         insn->header.predicate_control = 0; /* XXX */
905         insn->header.compression_control = BRW_COMPRESSION_NONE;
906         insn->header.destreg__conditonalmod = msg_reg_nr;
907
908         brw_instruction_set_destination(insn, dest);    /* UW? */
909         brw_instruction_set_source0(insn, retype(brw_vec8_grf(0), BRW_REGISTER_TYPE_UW));
910
911         brw_set_dp_read_message(insn,
912                                 255, /* bti */
913                                 3,  /* msg_control */
914                                 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
915                                 1, /* target cache */
916                                 1, /* msg_length */
917                                 2, /* response_length */
918                                 0); /* eot */
919     }
920 }
921
922
923 void brw_fb_WRITE(struct brw_compile *p,
924                   struct brw_reg dest,
925                   uint32_t msg_reg_nr,
926                   struct brw_reg src0,
927                   uint32_t binding_table_index,
928                   uint32_t msg_length,
929                   uint32_t response_length,
930                   int eot)
931 {
932     struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
933
934     insn->header.predicate_control = 0; /* XXX */
935     insn->header.compression_control = BRW_COMPRESSION_NONE;
936     insn->header.destreg__conditonalmod = msg_reg_nr;
937
938     brw_instruction_set_destination(insn, dest);
939     brw_instruction_set_source0(insn, src0);
940     brw_instruction_set_dp_write_message(insn,
941                                          binding_table_index,
942                                          BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
943                                          BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
944                                          msg_length,
945                                          1,     /* pixel scoreboard */
946                                          response_length,
947                                          eot);
948 }
949
950
951
952 void brw_SAMPLE (struct brw_compile *p,
953                  struct brw_reg dest,
954                  uint32_t msg_reg_nr,
955                  struct brw_reg src0,
956                  uint32_t binding_table_index,
957                  uint32_t sampler,
958                  uint32_t writemask,
959                  uint32_t msg_type,
960                  uint32_t response_length,
961                  uint32_t msg_length,
962                  cairo_bool_t eot)
963 {
964     int need_stall = 0;
965
966     if(writemask == 0) {
967         /*       printf("%s: zero writemask??\n", __FUNCTION__); */
968         return;
969     }
970
971     /* Hardware doesn't do destination dependency checking on send
972      * instructions properly.  Add a workaround which generates the
973      * dependency by other means.  In practice it seems like this bug
974      * only crops up for texture samples, and only where registers are
975      * written by the send and then written again later without being
976      * read in between.  Luckily for us, we already track that
977      * information and use it to modify the writemask for the
978      * instruction, so that is a guide for whether a workaround is
979      * needed.
980      */
981     if (writemask != WRITEMASK_XYZW) {
982         uint32_t dst_offset = 0;
983         uint32_t i, newmask = 0, len = 0;
984
985         for (i = 0; i < 4; i++) {
986             if (writemask & (1<<i))
987                 break;
988             dst_offset += 2;
989         }
990         for (; i < 4; i++) {
991             if (!(writemask & (1<<i)))
992                 break;
993             newmask |= 1<<i;
994             len++;
995         }
996
997         if (newmask != writemask) {
998             need_stall = 1;
999             /*   printf("need stall %x %x\n", newmask , writemask); */
1000         }
1001         else {
1002             struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1003
1004             newmask = ~newmask & WRITEMASK_XYZW;
1005
1006             brw_push_insn_state(p);
1007
1008             brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1009             brw_set_mask_control(p, BRW_MASK_DISABLE);
1010
1011             brw_MOV(p, m1, brw_vec8_grf(0));
1012             brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1013
1014             brw_pop_insn_state(p);
1015
1016             src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1017             dest = offset(dest, dst_offset);
1018             response_length = len * 2;
1019         }
1020     }
1021
1022     {
1023         struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
1024
1025         insn->header.predicate_control = 0; /* XXX */
1026         insn->header.compression_control = BRW_COMPRESSION_NONE;
1027         insn->header.destreg__conditonalmod = msg_reg_nr;
1028
1029         brw_instruction_set_destination(insn, dest);
1030         brw_instruction_set_source0(insn, src0);
1031         brw_set_sampler_message (insn, p->is_g4x,
1032                                  binding_table_index,
1033                                  sampler,
1034                                  msg_type,
1035                                  response_length,
1036                                  msg_length,
1037                                  eot);
1038     }
1039
1040     if (need_stall)
1041     {
1042         struct brw_reg reg = vec8(offset(dest, response_length-1));
1043
1044         /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1045         */
1046         brw_push_insn_state(p);
1047         brw_set_compression_control(p, 0);
1048         brw_MOV(p, reg, reg);
1049         brw_pop_insn_state(p);
1050     }
1051 }
1052
1053 /* All these variables are pretty confusing - we might be better off
1054  * using bitmasks and macros for this, in the old style.  Or perhaps
1055  * just having the caller instantiate the fields in dword3 itself.
1056  */
1057 void brw_urb_WRITE(struct brw_compile *p,
1058                    struct brw_reg dest,
1059                    uint32_t msg_reg_nr,
1060                    struct brw_reg src0,
1061                    int allocate,
1062                    int used,
1063                    uint32_t msg_length,
1064                    uint32_t response_length,
1065                    int eot,
1066                    int writes_complete,
1067                    uint32_t offset,
1068                    uint32_t swizzle)
1069 {
1070     struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
1071
1072     assert(msg_length < 16);
1073
1074     brw_instruction_set_destination (insn, dest);
1075     brw_instruction_set_source0 (insn, src0);
1076     brw_set_src1 (insn, brw_imm_d (0));
1077
1078     insn->header.destreg__conditonalmod = msg_reg_nr;
1079
1080     brw_set_urb_message (insn,
1081                          allocate,
1082                          used,
1083                          msg_length,
1084                          response_length,
1085                          eot,
1086                          writes_complete,
1087                          offset,
1088                          swizzle);
1089 }