2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "cairo-drm-intel-brw-eu.h"
37 /***********************************************************************
38 * Internal helper for constructing instructions
41 static void guess_execution_size( struct brw_instruction *insn,
44 if (reg.width == BRW_WIDTH_8 &&
45 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
46 insn->header.execution_size = BRW_EXECUTE_16;
48 insn->header.execution_size = reg.width; /* note - definitions are compatible */
53 brw_instruction_set_destination (struct brw_instruction *insn,
56 insn->bits1.da1.dest_reg_file = dest.file;
57 insn->bits1.da1.dest_reg_type = dest.type;
58 insn->bits1.da1.dest_address_mode = dest.address_mode;
60 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
61 insn->bits1.da1.dest_reg_nr = dest.nr;
63 if (insn->header.access_mode == BRW_ALIGN_1) {
64 insn->bits1.da1.dest_subreg_nr = dest.subnr;
65 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
66 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
67 insn->bits1.da1.dest_horiz_stride = dest.hstride;
69 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
70 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
73 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
75 /* These are different sizes in align1 vs align16:
77 if (insn->header.access_mode == BRW_ALIGN_1) {
78 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
79 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
80 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
81 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
83 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
87 /* NEW: Set the execution size based on dest.width and
88 * insn->compression_control:
90 guess_execution_size(insn, dest);
94 brw_instruction_set_source0 (struct brw_instruction *insn,
97 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
99 insn->bits1.da1.src0_reg_file = reg.file;
100 insn->bits1.da1.src0_reg_type = reg.type;
101 insn->bits2.da1.src0_abs = reg.abs;
102 insn->bits2.da1.src0_negate = reg.negate;
103 insn->bits2.da1.src0_address_mode = reg.address_mode;
105 if (reg.file == BRW_IMMEDIATE_VALUE) {
106 insn->bits3.ud = reg.dw1.ud;
108 /* Required to set some fields in src1 as well:
110 insn->bits1.da1.src1_reg_file = 0; /* arf */
111 insn->bits1.da1.src1_reg_type = reg.type;
113 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
114 if (insn->header.access_mode == BRW_ALIGN_1) {
115 insn->bits2.da1.src0_subreg_nr = reg.subnr;
116 insn->bits2.da1.src0_reg_nr = reg.nr;
118 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
119 insn->bits2.da16.src0_reg_nr = reg.nr;
122 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
124 if (insn->header.access_mode == BRW_ALIGN_1) {
125 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
127 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
131 if (insn->header.access_mode == BRW_ALIGN_1) {
132 if (reg.width == BRW_WIDTH_1 &&
133 insn->header.execution_size == BRW_EXECUTE_1) {
134 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
135 insn->bits2.da1.src0_width = BRW_WIDTH_1;
136 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
138 insn->bits2.da1.src0_horiz_stride = reg.hstride;
139 insn->bits2.da1.src0_width = reg.width;
140 insn->bits2.da1.src0_vert_stride = reg.vstride;
143 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
144 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
145 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
146 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
148 /* This is an oddity of the fact we're using the same
149 * descriptions for registers in align_16 as align_1:
151 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
152 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
154 insn->bits2.da16.src0_vert_stride = reg.vstride;
160 void brw_set_src1( struct brw_instruction *insn,
163 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
165 insn->bits1.da1.src1_reg_file = reg.file;
166 insn->bits1.da1.src1_reg_type = reg.type;
167 insn->bits3.da1.src1_abs = reg.abs;
168 insn->bits3.da1.src1_negate = reg.negate;
170 /* Only src1 can be immediate in two-argument instructions.
172 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
174 if (reg.file == BRW_IMMEDIATE_VALUE) {
175 insn->bits3.ud = reg.dw1.ud;
178 /* This is a hardware restriction, which may or may not be lifted
181 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
182 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
184 if (insn->header.access_mode == BRW_ALIGN_1) {
185 insn->bits3.da1.src1_subreg_nr = reg.subnr;
186 insn->bits3.da1.src1_reg_nr = reg.nr;
189 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
190 insn->bits3.da16.src1_reg_nr = reg.nr;
193 if (insn->header.access_mode == BRW_ALIGN_1) {
194 if (reg.width == BRW_WIDTH_1 &&
195 insn->header.execution_size == BRW_EXECUTE_1) {
196 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
197 insn->bits3.da1.src1_width = BRW_WIDTH_1;
198 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
201 insn->bits3.da1.src1_horiz_stride = reg.hstride;
202 insn->bits3.da1.src1_width = reg.width;
203 insn->bits3.da1.src1_vert_stride = reg.vstride;
207 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
208 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
209 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
210 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
212 /* This is an oddity of the fact we're using the same
213 * descriptions for registers in align_16 as align_1:
215 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
216 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
218 insn->bits3.da16.src1_vert_stride = reg.vstride;
225 static void brw_set_math_message( struct brw_instruction *insn,
227 uint32_t response_length,
229 uint32_t integer_type,
234 brw_set_src1 (insn, brw_imm_d (0));
236 insn->bits3.math.function = function;
237 insn->bits3.math.int_type = integer_type;
238 insn->bits3.math.precision = low_precision;
239 insn->bits3.math.saturate = saturate;
240 insn->bits3.math.data_type = dataType;
241 insn->bits3.math.response_length = response_length;
242 insn->bits3.math.msg_length = msg_length;
243 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
244 insn->bits3.math.end_of_thread = 0;
247 static void brw_set_urb_message( struct brw_instruction *insn,
251 uint32_t response_length,
255 uint32_t swizzle_control )
257 brw_set_src1 (insn, brw_imm_d (0));
259 insn->bits3.urb.opcode = 0; /* ? */
260 insn->bits3.urb.offset = offset;
261 insn->bits3.urb.swizzle_control = swizzle_control;
262 insn->bits3.urb.allocate = allocate;
263 insn->bits3.urb.used = used; /* ? */
264 insn->bits3.urb.complete = complete;
265 insn->bits3.urb.response_length = response_length;
266 insn->bits3.urb.msg_length = msg_length;
267 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
268 insn->bits3.urb.end_of_thread = end_of_thread;
272 brw_instruction_set_dp_write_message (struct brw_instruction *insn,
273 uint32_t binding_table_index,
274 uint32_t msg_control,
277 uint32_t pixel_scoreboard_clear,
278 uint32_t response_length,
279 uint32_t end_of_thread)
281 brw_set_src1 (insn, brw_imm_d (0));
283 insn->bits3.dp_write.binding_table_index = binding_table_index;
284 insn->bits3.dp_write.msg_control = msg_control;
285 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
286 insn->bits3.dp_write.msg_type = msg_type;
287 insn->bits3.dp_write.send_commit_msg = 0;
288 insn->bits3.dp_write.response_length = response_length;
289 insn->bits3.dp_write.msg_length = msg_length;
290 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
291 insn->bits3.urb.end_of_thread = end_of_thread;
294 static void brw_set_dp_read_message( struct brw_instruction *insn,
295 uint32_t binding_table_index,
296 uint32_t msg_control,
298 uint32_t target_cache,
300 uint32_t response_length,
301 uint32_t end_of_thread )
303 brw_set_src1 (insn, brw_imm_d (0));
305 insn->bits3.dp_read.binding_table_index = binding_table_index;
306 insn->bits3.dp_read.msg_control = msg_control;
307 insn->bits3.dp_read.msg_type = msg_type;
308 insn->bits3.dp_read.target_cache = target_cache;
309 insn->bits3.dp_read.response_length = response_length;
310 insn->bits3.dp_read.msg_length = msg_length;
311 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
312 insn->bits3.dp_read.end_of_thread = end_of_thread;
316 brw_set_sampler_message (struct brw_instruction *insn,
318 uint32_t binding_table_index,
321 uint32_t response_length,
325 brw_set_src1 (insn, brw_imm_d (0));
328 /* XXX presume the driver is sane! */
329 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
330 insn->bits3.sampler_g4x.sampler = sampler;
331 insn->bits3.sampler_g4x.msg_type = msg_type;
332 insn->bits3.sampler_g4x.response_length = response_length;
333 insn->bits3.sampler_g4x.msg_length = msg_length;
334 insn->bits3.sampler_g4x.end_of_thread = eot;
335 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
337 insn->bits3.sampler.binding_table_index = binding_table_index;
338 insn->bits3.sampler.sampler = sampler;
339 insn->bits3.sampler.msg_type = msg_type;
340 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
341 insn->bits3.sampler.response_length = response_length;
342 insn->bits3.sampler.msg_length = msg_length;
343 insn->bits3.sampler.end_of_thread = eot;
344 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
348 struct brw_instruction *
349 brw_next_instruction (struct brw_compile *p,
352 struct brw_instruction *insn;
354 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
356 insn = &p->store[p->nr_insn++];
357 memcpy(insn, p->current, sizeof(*insn));
359 /* Reset this one-shot flag: */
360 if (p->current->header.destreg__conditonalmod) {
361 p->current->header.destreg__conditonalmod = 0;
362 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
365 insn->header.opcode = opcode;
369 static struct brw_instruction *brw_alu1( struct brw_compile *p,
374 struct brw_instruction *insn = brw_next_instruction(p, opcode);
375 brw_instruction_set_destination(insn, dest);
376 brw_instruction_set_source0(insn, src);
380 static struct brw_instruction *brw_alu2(struct brw_compile *p,
384 struct brw_reg src1 )
386 struct brw_instruction *insn = brw_next_instruction(p, opcode);
387 brw_instruction_set_destination(insn, dest);
388 brw_instruction_set_source0(insn, src0);
389 brw_set_src1(insn, src1);
394 /***********************************************************************
395 * Convenience routines.
398 struct brw_instruction *brw_##OP(struct brw_compile *p, \
399 struct brw_reg dest, \
400 struct brw_reg src0) \
402 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
406 struct brw_instruction *brw_##OP(struct brw_compile *p, \
407 struct brw_reg dest, \
408 struct brw_reg src0, \
409 struct brw_reg src1) \
411 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
443 void brw_NOP(struct brw_compile *p)
445 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_NOP);
446 brw_instruction_set_destination(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
447 brw_instruction_set_source0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
448 brw_set_src1(insn, brw_imm_ud(0x0));
455 /***********************************************************************
456 * Comparisons, if/else/endif
459 struct brw_instruction *brw_JMPI(struct brw_compile *p,
464 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
466 p->current->header.predicate_control = BRW_PREDICATE_NONE;
471 /* EU takes the value from the flag register and pushes it onto some
472 * sort of a stack (presumably merging with any flag value already on
473 * the stack). Within an if block, the flags at the top of the stack
474 * control execution on each channel of the unit, eg. on each of the
475 * 16 pixel values in our wm programs.
477 * When the matching 'else' instruction is reached (presumably by
478 * countdown of the instruction count patched in by our ELSE/ENDIF
479 * functions), the relevant flags are inverted.
481 * When the matching 'endif' instruction is reached, the flags are
482 * popped off. If the stack is now empty, normal execution resumes.
484 * No attempt is made to deal with stack overflow (14 elements?).
486 struct brw_instruction *brw_IF(struct brw_compile *p, uint32_t execute_size)
488 struct brw_instruction *insn;
490 if (p->single_program_flow) {
491 assert(execute_size == BRW_EXECUTE_1);
493 insn = brw_next_instruction(p, BRW_OPCODE_ADD);
494 insn->header.predicate_inverse = 1;
496 insn = brw_next_instruction(p, BRW_OPCODE_IF);
499 /* Override the defaults for this instruction:
501 brw_instruction_set_destination (insn, brw_ip_reg ());
502 brw_instruction_set_source0 (insn, brw_ip_reg ());
503 brw_set_src1 (insn, brw_imm_d (0));
505 insn->header.execution_size = execute_size;
506 insn->header.compression_control = BRW_COMPRESSION_NONE;
507 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
508 insn->header.mask_control = BRW_MASK_ENABLE;
509 if (!p->single_program_flow)
510 insn->header.thread_control = BRW_THREAD_SWITCH;
512 p->current->header.predicate_control = BRW_PREDICATE_NONE;
518 struct brw_instruction *brw_ELSE(struct brw_compile *p,
519 struct brw_instruction *if_insn)
521 struct brw_instruction *insn;
523 if (p->single_program_flow) {
524 insn = brw_next_instruction(p, BRW_OPCODE_ADD);
526 insn = brw_next_instruction(p, BRW_OPCODE_ELSE);
529 brw_instruction_set_destination (insn, brw_ip_reg ());
530 brw_instruction_set_source0 (insn, brw_ip_reg ());
531 brw_set_src1 (insn, brw_imm_d (0));
533 insn->header.compression_control = BRW_COMPRESSION_NONE;
534 insn->header.execution_size = if_insn->header.execution_size;
535 insn->header.mask_control = BRW_MASK_ENABLE;
536 if (!p->single_program_flow)
537 insn->header.thread_control = BRW_THREAD_SWITCH;
539 /* Patch the if instruction to point at this instruction.
541 if (p->single_program_flow) {
542 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
544 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
546 assert(if_insn->header.opcode == BRW_OPCODE_IF);
548 if_insn->bits3.if_else.jump_count = insn - if_insn;
549 if_insn->bits3.if_else.pop_count = 1;
550 if_insn->bits3.if_else.pad0 = 0;
556 void brw_ENDIF(struct brw_compile *p,
557 struct brw_instruction *patch_insn)
559 if (p->single_program_flow) {
560 /* In single program flow mode, there's no need to execute an ENDIF,
561 * since we don't need to do any stack operations, and if we're executing
562 * currently, we want to just continue executing.
564 struct brw_instruction *next = &p->store[p->nr_insn];
566 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
568 patch_insn->bits3.ud = (next - patch_insn) * 16;
570 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_ENDIF);
572 brw_instruction_set_destination(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
573 brw_instruction_set_source0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
574 brw_set_src1 (insn, brw_imm_d (0));
576 insn->header.compression_control = BRW_COMPRESSION_NONE;
577 insn->header.execution_size = patch_insn->header.execution_size;
578 insn->header.mask_control = BRW_MASK_ENABLE;
579 insn->header.thread_control = BRW_THREAD_SWITCH;
581 assert(patch_insn->bits3.if_else.jump_count == 0);
583 /* Patch the if or else instructions to point at this or the next
584 * instruction respectively.
586 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
587 /* Automagically turn it into an IFF:
589 patch_insn->header.opcode = BRW_OPCODE_IFF;
590 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
591 patch_insn->bits3.if_else.pop_count = 0;
592 patch_insn->bits3.if_else.pad0 = 0;
593 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
594 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
595 patch_insn->bits3.if_else.pop_count = 1;
596 patch_insn->bits3.if_else.pad0 = 0;
601 /* Also pop item off the stack in the endif instruction:
603 insn->bits3.if_else.jump_count = 0;
604 insn->bits3.if_else.pop_count = 1;
605 insn->bits3.if_else.pad0 = 0;
609 struct brw_instruction *brw_BREAK(struct brw_compile *p)
611 struct brw_instruction *insn;
612 insn = brw_next_instruction(p, BRW_OPCODE_BREAK);
613 brw_instruction_set_destination(insn, brw_ip_reg());
614 brw_instruction_set_source0(insn, brw_ip_reg());
615 brw_set_src1(insn, brw_imm_d (0));
616 insn->header.compression_control = BRW_COMPRESSION_NONE;
617 insn->header.execution_size = BRW_EXECUTE_8;
618 /* insn->header.mask_control = BRW_MASK_DISABLE; */
619 insn->bits3.if_else.pad0 = 0;
623 struct brw_instruction *brw_CONT(struct brw_compile *p)
625 struct brw_instruction *insn;
626 insn = brw_next_instruction(p, BRW_OPCODE_CONTINUE);
627 brw_instruction_set_destination(insn, brw_ip_reg());
628 brw_instruction_set_source0(insn, brw_ip_reg());
629 brw_set_src1 (insn, brw_imm_d (0));
630 insn->header.compression_control = BRW_COMPRESSION_NONE;
631 insn->header.execution_size = BRW_EXECUTE_8;
632 /* insn->header.mask_control = BRW_MASK_DISABLE; */
633 insn->bits3.if_else.pad0 = 0;
639 struct brw_instruction *brw_DO(struct brw_compile *p, uint32_t execute_size)
641 if (p->single_program_flow) {
642 return &p->store[p->nr_insn];
644 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_DO);
646 /* Override the defaults for this instruction:
648 brw_instruction_set_destination(insn, brw_null_reg());
649 brw_instruction_set_source0(insn, brw_null_reg());
650 brw_set_src1(insn, brw_null_reg());
652 insn->header.compression_control = BRW_COMPRESSION_NONE;
653 insn->header.execution_size = execute_size;
654 insn->header.predicate_control = BRW_PREDICATE_NONE;
655 /* insn->header.mask_control = BRW_MASK_ENABLE; */
656 /* insn->header.mask_control = BRW_MASK_DISABLE; */
664 struct brw_instruction *brw_WHILE(struct brw_compile *p,
665 struct brw_instruction *do_insn)
667 struct brw_instruction *insn;
669 if (p->single_program_flow)
670 insn = brw_next_instruction(p, BRW_OPCODE_ADD);
672 insn = brw_next_instruction(p, BRW_OPCODE_WHILE);
674 brw_instruction_set_destination(insn, brw_ip_reg());
675 brw_instruction_set_source0(insn, brw_ip_reg());
676 brw_set_src1 (insn, brw_imm_d (0));
678 insn->header.compression_control = BRW_COMPRESSION_NONE;
680 if (p->single_program_flow) {
681 insn->header.execution_size = BRW_EXECUTE_1;
683 insn->bits3.d = (do_insn - insn) * 16;
685 insn->header.execution_size = do_insn->header.execution_size;
687 assert(do_insn->header.opcode == BRW_OPCODE_DO);
688 insn->bits3.if_else.jump_count = do_insn - insn + 1;
689 insn->bits3.if_else.pop_count = 0;
690 insn->bits3.if_else.pad0 = 0;
693 /* insn->header.mask_control = BRW_MASK_ENABLE; */
695 /* insn->header.mask_control = BRW_MASK_DISABLE; */
696 p->current->header.predicate_control = BRW_PREDICATE_NONE;
703 void brw_land_fwd_jump(struct brw_compile *p,
704 struct brw_instruction *jmp_insn)
706 struct brw_instruction *landing = &p->store[p->nr_insn];
708 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
709 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
711 jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
716 /* To integrate with the above, it makes sense that the comparison
717 * instruction should populate the flag register. It might be simpler
718 * just to use the flag reg for most WM tasks?
720 void brw_CMP(struct brw_compile *p,
722 uint32_t conditional,
726 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_CMP);
728 insn->header.destreg__conditonalmod = conditional;
729 brw_instruction_set_destination(insn, dest);
730 brw_instruction_set_source0(insn, src0);
731 brw_set_src1(insn, src1);
733 /* guess_execution_size(insn, src0); */
736 /* Make it so that future instructions will use the computed flag
737 * value until brw_set_predicate_control_flag_value() is called
740 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
742 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
743 p->flag_value = 0xff;
749 /***********************************************************************
750 * Helpers for the various SEND message types:
755 void brw_math( struct brw_compile *p,
764 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
765 uint32_t msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
766 uint32_t response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
768 /* Example code doesn't set predicate_control for send
771 insn->header.predicate_control = 0;
772 insn->header.destreg__conditonalmod = msg_reg_nr;
776 brw_instruction_set_destination(insn, dest);
777 brw_instruction_set_source0(insn, src);
778 brw_set_math_message(insn,
779 msg_length, response_length,
781 BRW_MATH_INTEGER_UNSIGNED,
787 /* Use 2 send instructions to invert 16 elements
789 void brw_math_16( struct brw_compile *p,
797 struct brw_instruction *insn;
798 uint32_t msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
799 uint32_t response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
801 /* First instruction:
803 brw_push_insn_state(p);
804 brw_set_predicate_control_flag_value(p, 0xff);
805 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
807 insn = brw_next_instruction(p, BRW_OPCODE_SEND);
808 insn->header.destreg__conditonalmod = msg_reg_nr;
810 brw_instruction_set_destination(insn, dest);
811 brw_instruction_set_source0(insn, src);
812 brw_set_math_message(insn,
813 msg_length, response_length,
815 BRW_MATH_INTEGER_UNSIGNED,
818 BRW_MATH_DATA_VECTOR);
820 /* Second instruction:
822 insn = brw_next_instruction(p, BRW_OPCODE_SEND);
823 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
824 insn->header.destreg__conditonalmod = msg_reg_nr+1;
826 brw_instruction_set_destination(insn, offset(dest,1));
827 brw_instruction_set_source0(insn, src);
828 brw_set_math_message(insn,
829 msg_length, response_length,
831 BRW_MATH_INTEGER_UNSIGNED,
834 BRW_MATH_DATA_VECTOR);
836 brw_pop_insn_state(p);
842 void brw_dp_WRITE_16( struct brw_compile *p,
845 uint32_t scratch_offset )
848 brw_push_insn_state(p);
849 brw_set_mask_control(p, BRW_MASK_DISABLE);
850 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
853 retype (brw_vec1_grf (0, 2), BRW_REGISTER_TYPE_D),
854 brw_imm_d (scratch_offset));
856 brw_pop_insn_state(p);
860 uint32_t msg_length = 3;
861 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
862 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
864 insn->header.predicate_control = 0; /* XXX */
865 insn->header.compression_control = BRW_COMPRESSION_NONE;
866 insn->header.destreg__conditonalmod = msg_reg_nr;
868 brw_instruction_set_destination(insn, dest);
869 brw_instruction_set_source0(insn, src);
871 brw_instruction_set_dp_write_message(insn,
873 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
874 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
876 0, /* pixel scoreboard */
877 0, /* response_length */
884 void brw_dp_READ_16( struct brw_compile *p,
887 uint32_t scratch_offset )
890 brw_push_insn_state(p);
891 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
892 brw_set_mask_control(p, BRW_MASK_DISABLE);
895 retype (brw_vec1_grf (0, 2), BRW_REGISTER_TYPE_D),
896 brw_imm_d (scratch_offset));
898 brw_pop_insn_state(p);
902 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
904 insn->header.predicate_control = 0; /* XXX */
905 insn->header.compression_control = BRW_COMPRESSION_NONE;
906 insn->header.destreg__conditonalmod = msg_reg_nr;
908 brw_instruction_set_destination(insn, dest); /* UW? */
909 brw_instruction_set_source0(insn, retype(brw_vec8_grf(0), BRW_REGISTER_TYPE_UW));
911 brw_set_dp_read_message(insn,
914 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
915 1, /* target cache */
917 2, /* response_length */
923 void brw_fb_WRITE(struct brw_compile *p,
927 uint32_t binding_table_index,
929 uint32_t response_length,
932 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
934 insn->header.predicate_control = 0; /* XXX */
935 insn->header.compression_control = BRW_COMPRESSION_NONE;
936 insn->header.destreg__conditonalmod = msg_reg_nr;
938 brw_instruction_set_destination(insn, dest);
939 brw_instruction_set_source0(insn, src0);
940 brw_instruction_set_dp_write_message(insn,
942 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
943 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
945 1, /* pixel scoreboard */
952 void brw_SAMPLE (struct brw_compile *p,
956 uint32_t binding_table_index,
960 uint32_t response_length,
967 /* printf("%s: zero writemask??\n", __FUNCTION__); */
971 /* Hardware doesn't do destination dependency checking on send
972 * instructions properly. Add a workaround which generates the
973 * dependency by other means. In practice it seems like this bug
974 * only crops up for texture samples, and only where registers are
975 * written by the send and then written again later without being
976 * read in between. Luckily for us, we already track that
977 * information and use it to modify the writemask for the
978 * instruction, so that is a guide for whether a workaround is
981 if (writemask != WRITEMASK_XYZW) {
982 uint32_t dst_offset = 0;
983 uint32_t i, newmask = 0, len = 0;
985 for (i = 0; i < 4; i++) {
986 if (writemask & (1<<i))
991 if (!(writemask & (1<<i)))
997 if (newmask != writemask) {
999 /* printf("need stall %x %x\n", newmask , writemask); */
1002 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1004 newmask = ~newmask & WRITEMASK_XYZW;
1006 brw_push_insn_state(p);
1008 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1009 brw_set_mask_control(p, BRW_MASK_DISABLE);
1011 brw_MOV(p, m1, brw_vec8_grf(0));
1012 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1014 brw_pop_insn_state(p);
1016 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1017 dest = offset(dest, dst_offset);
1018 response_length = len * 2;
1023 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
1025 insn->header.predicate_control = 0; /* XXX */
1026 insn->header.compression_control = BRW_COMPRESSION_NONE;
1027 insn->header.destreg__conditonalmod = msg_reg_nr;
1029 brw_instruction_set_destination(insn, dest);
1030 brw_instruction_set_source0(insn, src0);
1031 brw_set_sampler_message (insn, p->is_g4x,
1032 binding_table_index,
1042 struct brw_reg reg = vec8(offset(dest, response_length-1));
1044 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1046 brw_push_insn_state(p);
1047 brw_set_compression_control(p, 0);
1048 brw_MOV(p, reg, reg);
1049 brw_pop_insn_state(p);
1053 /* All these variables are pretty confusing - we might be better off
1054 * using bitmasks and macros for this, in the old style. Or perhaps
1055 * just having the caller instantiate the fields in dword3 itself.
1057 void brw_urb_WRITE(struct brw_compile *p,
1058 struct brw_reg dest,
1059 uint32_t msg_reg_nr,
1060 struct brw_reg src0,
1063 uint32_t msg_length,
1064 uint32_t response_length,
1066 int writes_complete,
1070 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
1072 assert(msg_length < 16);
1074 brw_instruction_set_destination (insn, dest);
1075 brw_instruction_set_source0 (insn, src0);
1076 brw_set_src1 (insn, brw_imm_d (0));
1078 insn->header.destreg__conditonalmod = msg_reg_nr;
1080 brw_set_urb_message (insn,