1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
31 * TGSI to LLVM IR translation -- SoA.
33 * @author Jose Fonseca <jfonseca@vmware.com>
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_coro.h"
64 #include "lp_bld_quad.h"
65 #include "lp_bld_tgsi.h"
66 #include "lp_bld_limits.h"
67 #include "lp_bld_debug.h"
68 #include "lp_bld_printf.h"
69 #include "lp_bld_sample.h"
70 #include "lp_bld_struct.h"
71 #include "lp_bld_jit_types.h"
73 #define DUMP_GS_EMITS 0
76 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
80 * - take execution masks in consideration
81 * - debug control-flow instructions
83 #define DEBUG_EXECUTION 0
87 * Emit code to print a register value.
90 emit_dump_reg(struct gallivm_state *gallivm,
98 snprintf(buf, sizeof buf, " %s[%u].%c = ",
100 index, "xyzw"[chan]);
102 lp_build_print_value(gallivm, buf, value);
105 static inline struct function_ctx *
106 func_ctx(struct lp_exec_mask *mask)
108 assert(mask->function_stack_size > 0);
109 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
110 return &mask->function_stack[mask->function_stack_size - 1];
114 * combine the execution mask if there is one with the current mask.
117 mask_vec(struct lp_build_tgsi_context *bld_base)
119 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
120 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
121 struct lp_exec_mask *exec_mask = &bld->exec_mask;
122 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
123 if (!exec_mask->has_mask) {
127 return exec_mask->exec_mask;
128 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
129 exec_mask->exec_mask, "");
132 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
133 struct lp_build_tgsi_context * bld_base)
135 enum tgsi_opcode opcode =
136 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
137 bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
138 opcode == TGSI_OPCODE_CASE);
139 lp_exec_break(mask, &bld_base->pc, break_always);
142 static void lp_exec_switch(struct lp_exec_mask *mask,
143 LLVMValueRef switchval)
145 struct function_ctx *ctx = func_ctx(mask);
147 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
148 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
149 ctx->switch_stack_size++;
153 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
155 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
157 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
158 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
159 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
160 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
161 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
162 ctx->switch_stack_size++;
164 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
165 ctx->switch_val = switchval;
166 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
167 ctx->switch_in_default = false;
170 lp_exec_mask_update(mask);
173 static void lp_exec_endswitch(struct lp_exec_mask *mask,
174 struct lp_build_tgsi_context * bld_base)
176 LLVMBuilderRef builder = mask->bld->gallivm->builder;
177 struct function_ctx *ctx = func_ctx(mask);
179 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
180 ctx->switch_stack_size--;
184 /* check if there's deferred default if so do it now */
185 if (ctx->switch_pc && !ctx->switch_in_default) {
186 LLVMValueRef prevmask, defaultmask;
188 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
189 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
190 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
191 ctx->switch_in_default = true;
193 lp_exec_mask_update(mask);
195 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
196 TGSI_OPCODE_DEFAULT);
198 tmp_pc = bld_base->pc;
199 bld_base->pc = ctx->switch_pc;
201 * re-purpose switch_pc to point to here again, since we stop execution of
202 * the deferred default after next break.
204 ctx->switch_pc = tmp_pc - 1;
209 else if (ctx->switch_pc && ctx->switch_in_default) {
210 assert(bld_base->pc == ctx->switch_pc + 1);
213 ctx->switch_stack_size--;
214 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
215 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
216 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
217 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
218 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
220 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
222 lp_exec_mask_update(mask);
225 static void lp_exec_case(struct lp_exec_mask *mask,
226 LLVMValueRef caseval)
228 LLVMBuilderRef builder = mask->bld->gallivm->builder;
229 struct function_ctx *ctx = func_ctx(mask);
231 LLVMValueRef casemask, prevmask;
233 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
237 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
238 if (!ctx->switch_in_default) {
239 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
240 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
241 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
242 ctx->switch_mask_default, "sw_default_mask");
243 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
244 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
246 lp_exec_mask_update(mask);
251 * Analyse default statement in a switch.
252 * \return true if default is last statement, false otherwise
253 * \param default_pc_start contains pc of instruction to jump to
254 * if default wasn't last but there's no
255 * fallthrough into default.
257 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
258 struct lp_build_tgsi_context * bld_base,
259 int *default_pc_start)
261 unsigned pc = bld_base->pc;
262 struct function_ctx *ctx = func_ctx(mask);
263 int curr_switch_stack = ctx->switch_stack_size;
265 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
269 /* skip over case statements which are together with default */
270 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
274 while (pc != ~0u && pc < bld_base->num_instructions) {
275 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
277 case TGSI_OPCODE_CASE:
278 if (curr_switch_stack == ctx->switch_stack_size) {
279 *default_pc_start = pc - 1;
283 case TGSI_OPCODE_SWITCH:
286 case TGSI_OPCODE_ENDSWITCH:
287 if (curr_switch_stack == ctx->switch_stack_size) {
288 *default_pc_start = pc - 1;
298 /* should never arrive here */
303 static void lp_exec_default(struct lp_exec_mask *mask,
304 struct lp_build_tgsi_context * bld_base)
306 LLVMBuilderRef builder = mask->bld->gallivm->builder;
307 struct function_ctx *ctx = func_ctx(mask);
309 int default_exec_pc = 0;
310 boolean default_is_last;
312 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
317 * This is a messy opcode, because it may not be always at the end and
318 * there can be fallthrough in and out of it.
321 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
323 * If it is last statement in switch (note that case statements appearing
324 * "at the same time" as default don't change that) everything is just fine,
325 * update switch mask and go on. This means we can handle default with
326 * fallthrough INTO it without overhead, if it is last.
328 if (default_is_last) {
329 LLVMValueRef prevmask, defaultmask;
330 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
331 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
332 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
333 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
334 ctx->switch_in_default = true;
336 lp_exec_mask_update(mask);
340 * Technically, "case" immediately before default isn't really a
341 * fallthrough, however we still have to count them as such as we
342 * already have updated the masks.
343 * If that happens in practice could add a switch optimizer pass
344 * which just gets rid of all case statements appearing together with
345 * default (or could do switch analysis at switch start time instead).
347 enum tgsi_opcode opcode =
348 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
349 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
350 opcode != TGSI_OPCODE_SWITCH);
352 * If it is not last statement and there was no fallthrough into it,
353 * we record the PC and continue execution at next case (again, those
354 * case encountered at the same time don't count). At endswitch
355 * time, we update switchmask, and go back executing the code we skipped
356 * until the next break (possibly re-executing some code with changed mask
357 * if there was a fallthrough out of default).
358 * Finally, if it is not last statement and there was a fallthrough into it,
359 * do the same as with the former case, except instead of skipping the code
360 * just execute it without updating the mask, then go back and re-execute.
362 ctx->switch_pc = bld_base->pc;
364 bld_base->pc = default_exec_pc;
370 static void lp_exec_mask_call(struct lp_exec_mask *mask,
374 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
378 lp_exec_mask_function_init(mask, mask->function_stack_size);
379 mask->function_stack[mask->function_stack_size].pc = *pc;
380 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
381 mask->function_stack_size++;
385 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
387 LLVMBuilderRef builder = mask->bld->gallivm->builder;
388 struct function_ctx *ctx = func_ctx(mask);
389 LLVMValueRef exec_mask;
391 if (ctx->cond_stack_size == 0 &&
392 ctx->loop_stack_size == 0 &&
393 ctx->switch_stack_size == 0 &&
394 mask->function_stack_size == 1) {
395 /* returning from main() */
400 if (mask->function_stack_size == 1) {
402 * This requires special handling since we need to ensure
403 * we don't drop the mask even if we have no call stack
404 * (e.g. after a ret in a if clause after the endif)
406 mask->ret_in_main = TRUE;
409 exec_mask = LLVMBuildNot(builder,
413 mask->ret_mask = LLVMBuildAnd(builder,
415 exec_mask, "ret_full");
417 lp_exec_mask_update(mask);
420 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
424 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
426 struct function_ctx *ctx;
428 assert(mask->function_stack_size > 1);
429 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
431 ctx = func_ctx(mask);
432 mask->function_stack_size--;
435 mask->ret_mask = ctx->ret_mask;
437 lp_exec_mask_update(mask);
442 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
447 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
448 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
449 LLVMValueRef var_of_array;
452 case TGSI_FILE_TEMPORARY:
453 array_of_vars = bld->temps;
454 var_of_array = bld->temps_array;
456 case TGSI_FILE_OUTPUT:
457 array_of_vars = bld->outputs;
458 var_of_array = bld->outputs_array;
467 if (bld->indirect_files & (1 << file)) {
468 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
469 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
471 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
473 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
475 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
479 assert(index <= bld->bld_base.info->file_max[file]);
480 return array_of_vars[index][chan];
486 * Return pointer to a temporary register channel (src or dest).
487 * Note that indirect addressing cannot be handled here.
488 * \param index which temporary register
489 * \param chan which channel of the temp register.
492 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
496 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
500 * Return pointer to a output register channel (src or dest).
501 * Note that indirect addressing cannot be handled here.
502 * \param index which output register
503 * \param chan which channel of the output register.
506 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
510 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
514 * If we have indirect addressing in outputs copy our alloca array
515 * to the outputs slots specified by the caller to make sure
516 * our outputs are delivered consistently via the same interface.
519 gather_outputs(struct lp_build_tgsi_soa_context * bld)
521 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
522 unsigned index, chan;
523 assert(bld->bld_base.info->num_outputs <=
524 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
525 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
526 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
527 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
535 * XXX the lp_build_gather() function should be capable of doing this
536 * with a little work.
539 build_gather(struct lp_build_tgsi_context *bld_base,
540 LLVMValueRef base_ptr,
541 LLVMValueRef indexes,
542 LLVMValueRef overflow_mask,
543 LLVMValueRef indexes2)
545 struct gallivm_state *gallivm = bld_base->base.gallivm;
546 LLVMBuilderRef builder = gallivm->builder;
547 struct lp_build_context *uint_bld = &bld_base->uint_bld;
548 struct lp_build_context *bld = &bld_base->base;
553 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
557 * overflow_mask is a vector telling us which channels
558 * in the vector overflowed. We use the overflow behavior for
559 * constant buffers which is defined as:
560 * Out of bounds access to constant buffer returns 0 in all
561 * components. Out of bounds behavior is always with respect
562 * to the size of the buffer bound at that slot.
567 * We avoid per-element control flow here (also due to llvm going crazy,
568 * though I suspect it's better anyway since overflow is likely rare).
569 * Note that since we still fetch from buffers even if num_elements was
570 * zero (in this case we'll fetch from index zero) the jit func callers
571 * MUST provide valid fake constant buffers of size 4x32 (the values do
572 * not matter), otherwise we'd still need (not per element though)
575 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
577 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
581 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
583 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
586 LLVMValueRef scalar_ptr, scalar;
588 di = lp_build_const_int32(bld->gallivm, i);
590 si = lp_build_const_int32(bld->gallivm, i >> 1);
594 if (indexes2 && (i & 1)) {
595 index = LLVMBuildExtractElement(builder,
598 index = LLVMBuildExtractElement(builder,
601 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
602 &index, 1, "gather_ptr");
603 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
605 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
610 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
611 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
612 bld_base->dbl_bld.int_vec_type, "");
613 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
614 bld_base->dbl_bld.zero, res);
616 res = lp_build_select(bld, overflow_mask, bld->zero, res);
624 * Scatter/store vector.
627 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
628 LLVMValueRef base_ptr,
629 LLVMValueRef indexes,
631 struct lp_exec_mask *mask)
633 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
634 LLVMBuilderRef builder = gallivm->builder;
636 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
639 * Loop over elements of index_vec, store scalar value.
641 for (i = 0; i < bld->bld_base.base.type.length; i++) {
642 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
643 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
644 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
645 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
646 LLVMValueRef scalar_pred = pred ?
647 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
650 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
651 ii, val, index, scalar_ptr);
654 LLVMValueRef real_val, dst_val;
655 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
656 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
657 LLVMBuildStore(builder, real_val, scalar_ptr);
660 LLVMBuildStore(builder, val, scalar_ptr);
667 * Read the current value of the ADDR register, convert the floats to
668 * ints, add the base index and return the vector of offsets.
669 * The offsets will be used to index into the constant buffer or
670 * temporary register file.
673 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
674 unsigned reg_file, unsigned reg_index,
675 const struct tgsi_ind_register *indirect_reg,
678 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
679 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
680 /* always use X component of address register */
681 unsigned swizzle = indirect_reg->Swizzle;
684 LLVMValueRef max_index;
687 assert(bld->indirect_files & (1 << reg_file));
689 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
692 switch (indirect_reg->File) {
693 case TGSI_FILE_ADDRESS:
694 rel = LLVMBuildLoad(builder,
695 bld->addr[indirect_reg->Index][swizzle],
697 /* ADDR LLVM values already have LLVM integer type. */
699 case TGSI_FILE_TEMPORARY:
700 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
701 rel = LLVMBuildLoad(builder, rel, "load temp reg");
702 /* TEMP LLVM values always have LLVM float type, but for indirection, the
703 * value actually stored is expected to be an integer */
704 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
708 rel = uint_bld->zero;
711 index = lp_build_add(uint_bld, base, rel);
714 * emit_fetch_constant handles constant buffer overflow so this code
715 * is pointless for them.
716 * Furthermore the D3D10 spec in section 6.5 says:
717 * If the constant buffer bound to a slot is larger than the size
718 * declared in the shader for that slot, implementations are allowed
719 * to return incorrect data (not necessarily 0) for indices that are
720 * larger than the declared size but smaller than the buffer size.
722 if (reg_file != TGSI_FILE_CONSTANT) {
723 assert(index_limit >= 0);
724 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
725 uint_bld->type, index_limit);
727 assert(!uint_bld->type.sign);
728 index = lp_build_min(uint_bld, index, max_index);
734 static struct lp_build_context *
735 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
736 enum tgsi_opcode_type stype)
738 struct lp_build_context *bld_fetch;
741 case TGSI_TYPE_FLOAT:
742 case TGSI_TYPE_UNTYPED:
743 bld_fetch = &bld_base->base;
745 case TGSI_TYPE_UNSIGNED:
746 bld_fetch = &bld_base->uint_bld;
748 case TGSI_TYPE_SIGNED:
749 bld_fetch = &bld_base->int_bld;
751 case TGSI_TYPE_DOUBLE:
752 bld_fetch = &bld_base->dbl_bld;
754 case TGSI_TYPE_UNSIGNED64:
755 bld_fetch = &bld_base->uint64_bld;
757 case TGSI_TYPE_SIGNED64:
758 bld_fetch = &bld_base->int64_bld;
770 get_soa_array_offsets(struct lp_build_context *uint_bld,
771 LLVMValueRef indirect_index,
773 boolean need_perelement_offset)
775 struct gallivm_state *gallivm = uint_bld->gallivm;
776 LLVMValueRef chan_vec =
777 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
778 LLVMValueRef length_vec =
779 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
780 LLVMValueRef index_vec;
782 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
783 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
784 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
785 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
787 if (need_perelement_offset) {
788 LLVMValueRef pixel_offsets;
790 /* build pixel offset vector: {0, 1, 2, 3, ...} */
791 pixel_offsets = uint_bld->undef;
792 for (i = 0; i < uint_bld->type.length; i++) {
793 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
794 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
797 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
804 struct lp_build_tgsi_context * bld_base,
805 const struct tgsi_full_src_register * reg,
806 enum tgsi_opcode_type stype,
809 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
810 struct gallivm_state *gallivm = bld_base->base.gallivm;
811 LLVMBuilderRef builder = gallivm->builder;
812 struct lp_build_context *uint_bld = &bld_base->uint_bld;
813 unsigned dimension = 0;
814 LLVMValueRef consts_ptr;
815 LLVMValueRef num_consts;
817 unsigned swizzle = swizzle_in & 0xffff;
819 /* XXX: Handle fetching xyzw components as a vector */
820 assert(swizzle != ~0u);
822 if (reg->Register.Dimension) {
823 assert(!reg->Dimension.Indirect);
824 dimension = reg->Dimension.Index;
825 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
828 consts_ptr = bld->consts[dimension];
829 num_consts = bld->consts_sizes[dimension];
831 if (reg->Register.Indirect) {
832 LLVMValueRef indirect_index;
833 LLVMValueRef swizzle_vec =
834 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
835 LLVMValueRef index_vec; /* index into the const buffer */
836 LLVMValueRef overflow_mask;
837 LLVMValueRef index_vec2 = NULL;
839 indirect_index = get_indirect_index(bld,
843 bld->bld_base.info->file_max[reg->Register.File]);
845 /* All fetches are from the same constant buffer, so
846 * we need to propagate the size to a vector to do a
847 * vector comparison */
848 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
849 /* Construct a boolean vector telling us which channels
850 * overflow the bound constant buffer */
851 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
852 indirect_index, num_consts);
854 /* index_vec = indirect_index * 4 + swizzle */
855 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
856 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
858 if (tgsi_type_is_64bit(stype)) {
859 LLVMValueRef swizzle_vec2;
860 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
861 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
862 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
864 /* Gather values from the constant buffer */
865 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
868 LLVMValueRef index; /* index into the const buffer */
869 LLVMValueRef scalar, scalar_ptr;
870 struct lp_build_context *bld_broad = &bld_base->base;
871 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
873 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
876 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
878 LLVMValueRef scalar2, scalar2_ptr;
879 LLVMValueRef shuffles[2];
880 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
882 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
885 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
886 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
887 shuffles[0] = lp_build_const_int32(gallivm, 0);
888 shuffles[1] = lp_build_const_int32(gallivm, 1);
890 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
891 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
892 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
894 if (stype == TGSI_TYPE_DOUBLE) {
895 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
896 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
897 bld_broad = &bld_base->dbl_bld;
898 } else if (stype == TGSI_TYPE_UNSIGNED64) {
899 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
900 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
901 bld_broad = &bld_base->uint64_bld;
902 } else if (stype == TGSI_TYPE_SIGNED64) {
903 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
904 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
905 bld_broad = &bld_base->int64_bld;
907 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
908 res = lp_build_broadcast_scalar(bld_broad, scalar);
913 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
914 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
915 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
922 * Fetch 64-bit values from two separate channels.
923 * 64-bit values are stored split across two channels, like xy and zw.
924 * This function creates a set of vec_length*2 floats,
925 * extracts the values from the two channels,
926 * puts them in the correct place, then casts to vec_length 64-bits.
930 struct lp_build_tgsi_context * bld_base,
931 enum tgsi_opcode_type stype,
935 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
936 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
937 LLVMBuilderRef builder = gallivm->builder;
939 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
941 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
942 int len = bld_base->base.type.length * 2;
943 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
945 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
946 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
947 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
949 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
951 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
955 emit_fetch_immediate(
956 struct lp_build_tgsi_context * bld_base,
957 const struct tgsi_full_src_register * reg,
958 enum tgsi_opcode_type stype,
961 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
962 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
963 LLVMBuilderRef builder = gallivm->builder;
964 LLVMValueRef res = NULL;
965 unsigned swizzle = swizzle_in & 0xffff;
967 if (bld->use_immediates_array || reg->Register.Indirect) {
968 LLVMValueRef imms_array;
969 LLVMTypeRef fptr_type;
971 /* cast imms_array pointer to float* */
972 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
973 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
975 if (reg->Register.Indirect) {
976 LLVMValueRef indirect_index;
977 LLVMValueRef index_vec; /* index into the immediate register array */
978 LLVMValueRef index_vec2 = NULL;
979 indirect_index = get_indirect_index(bld,
983 bld->bld_base.info->file_max[reg->Register.File]);
985 * Unlike for other reg classes, adding pixel offsets is unnecessary -
986 * immediates are stored as full vectors (FIXME??? - might be better
987 * to store them the same as constants) but all elements are the same
990 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
994 if (tgsi_type_is_64bit(stype))
995 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
999 /* Gather values from the immediate register array */
1000 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1002 LLVMValueRef gep[2];
1003 gep[0] = lp_build_const_int32(gallivm, 0);
1004 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1005 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1006 bld->imms_array, gep, 2, "");
1007 res = LLVMBuildLoad(builder, imms_ptr, "");
1009 if (tgsi_type_is_64bit(stype)) {
1010 LLVMValueRef imms_ptr2;
1012 gep[1] = lp_build_const_int32(gallivm,
1013 reg->Register.Index * 4 + (swizzle_in >> 16));
1014 imms_ptr2 = LLVMBuildGEP(builder,
1015 bld->imms_array, gep, 2, "");
1016 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1017 res = emit_fetch_64bit(bld_base, stype, res, res2);
1022 res = bld->immediates[reg->Register.Index][swizzle];
1023 if (tgsi_type_is_64bit(stype))
1024 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1027 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1028 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1029 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1036 struct lp_build_tgsi_context * bld_base,
1037 const struct tgsi_full_src_register * reg,
1038 enum tgsi_opcode_type stype,
1039 unsigned swizzle_in)
1041 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1042 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1043 LLVMBuilderRef builder = gallivm->builder;
1045 unsigned swizzle = swizzle_in & 0xffff;
1047 if (reg->Register.Indirect) {
1048 LLVMValueRef indirect_index;
1049 LLVMValueRef index_vec; /* index into the input reg array */
1050 LLVMValueRef index_vec2 = NULL;
1051 LLVMValueRef inputs_array;
1052 LLVMTypeRef fptr_type;
1054 indirect_index = get_indirect_index(bld,
1056 reg->Register.Index,
1058 bld->bld_base.info->file_max[reg->Register.File]);
1060 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1064 if (tgsi_type_is_64bit(stype)) {
1065 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1070 /* cast inputs_array pointer to float* */
1071 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1072 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1074 /* Gather values from the input register array */
1075 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1077 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1078 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1079 reg->Register.Index * 4 + swizzle);
1080 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1081 bld->inputs_array, &lindex, 1, "");
1083 res = LLVMBuildLoad(builder, input_ptr, "");
1084 if (tgsi_type_is_64bit(stype)) {
1085 LLVMValueRef lindex1;
1086 LLVMValueRef input_ptr2;
1089 lindex1 = lp_build_const_int32(gallivm,
1090 reg->Register.Index * 4 + (swizzle_in >> 16));
1091 input_ptr2 = LLVMBuildGEP(builder,
1092 bld->inputs_array, &lindex1, 1, "");
1093 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1094 res = emit_fetch_64bit(bld_base, stype, res, res2);
1098 res = bld->inputs[reg->Register.Index][swizzle];
1099 if (tgsi_type_is_64bit(stype))
1100 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1106 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1107 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1108 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1116 emit_fetch_gs_input(
1117 struct lp_build_tgsi_context * bld_base,
1118 const struct tgsi_full_src_register * reg,
1119 enum tgsi_opcode_type stype,
1120 unsigned swizzle_in)
1122 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1123 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1124 const struct tgsi_shader_info *info = bld->bld_base.info;
1125 LLVMBuilderRef builder = gallivm->builder;
1126 LLVMValueRef attrib_index = NULL;
1127 LLVMValueRef vertex_index = NULL;
1128 unsigned swizzle = swizzle_in & 0xffff;
1129 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1132 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1133 /* This is really a system value not a regular input */
1134 assert(!reg->Register.Indirect);
1135 assert(!reg->Dimension.Indirect);
1136 res = bld->system_values.prim_id;
1137 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1138 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1143 if (reg->Register.Indirect) {
1145 * XXX: this is possibly not quite the right value, since file_max may be
1146 * larger than the max attrib index, due to it being the max of declared
1147 * inputs AND the max vertices per prim (which is 6 for tri adj).
1148 * It should however be safe to use (since we always allocate
1149 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1151 int index_limit = info->file_max[reg->Register.File];
1152 attrib_index = get_indirect_index(bld,
1154 reg->Register.Index,
1158 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1161 if (reg->Dimension.Indirect) {
1163 * A fixed 6 should do as well (which is what we allocate).
1165 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1166 vertex_index = get_indirect_index(bld,
1168 reg->Dimension.Index,
1172 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1175 res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1176 reg->Dimension.Indirect,
1178 reg->Register.Indirect,
1183 if (tgsi_type_is_64bit(stype)) {
1184 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1186 res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1187 reg->Dimension.Indirect,
1189 reg->Register.Indirect,
1193 res = emit_fetch_64bit(bld_base, stype, res, res2);
1194 } else if (stype == TGSI_TYPE_UNSIGNED) {
1195 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1196 } else if (stype == TGSI_TYPE_SIGNED) {
1197 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1204 emit_fetch_tcs_input(
1205 struct lp_build_tgsi_context * bld_base,
1206 const struct tgsi_full_src_register * reg,
1207 enum tgsi_opcode_type stype,
1208 unsigned swizzle_in)
1210 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1211 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1212 const struct tgsi_shader_info *info = bld->bld_base.info;
1213 LLVMBuilderRef builder = gallivm->builder;
1214 LLVMValueRef attrib_index = NULL;
1215 LLVMValueRef vertex_index = NULL;
1216 unsigned swizzle = swizzle_in & 0xffff;
1217 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1220 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1221 /* This is really a system value not a regular input */
1222 assert(!reg->Register.Indirect);
1223 assert(!reg->Dimension.Indirect);
1224 res = bld->system_values.prim_id;
1225 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1226 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1231 if (reg->Register.Indirect) {
1232 int index_limit = info->file_max[reg->Register.File];
1233 attrib_index = get_indirect_index(bld,
1235 reg->Register.Index,
1239 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1242 if (reg->Dimension.Indirect) {
1243 vertex_index = get_indirect_index(bld,
1245 reg->Dimension.Index,
1247 PIPE_MAX_SHADER_INPUTS);
1249 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1252 // TCS can read from its own outputs
1253 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1254 res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1255 reg->Dimension.Indirect,
1257 reg->Register.Indirect,
1261 bld_base->info->output_semantic_name[reg->Register.Index]);
1263 res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1264 reg->Dimension.Indirect,
1266 reg->Register.Indirect,
1274 if (tgsi_type_is_64bit(stype)) {
1275 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1277 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1278 res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1279 reg->Dimension.Indirect,
1281 reg->Register.Indirect,
1285 bld_base->info->output_semantic_name[reg->Register.Index]);
1287 res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1288 reg->Dimension.Indirect,
1290 reg->Register.Indirect,
1296 res = emit_fetch_64bit(bld_base, stype, res, res2);
1297 } else if (stype == TGSI_TYPE_UNSIGNED) {
1298 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1299 } else if (stype == TGSI_TYPE_SIGNED) {
1300 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1307 emit_fetch_tes_input(
1308 struct lp_build_tgsi_context * bld_base,
1309 const struct tgsi_full_src_register * reg,
1310 enum tgsi_opcode_type stype,
1311 unsigned swizzle_in)
1313 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1314 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1315 const struct tgsi_shader_info *info = bld->bld_base.info;
1316 LLVMBuilderRef builder = gallivm->builder;
1317 LLVMValueRef attrib_index = NULL;
1318 LLVMValueRef vertex_index = NULL;
1319 unsigned swizzle = swizzle_in & 0xffff;
1320 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1323 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1324 /* This is really a system value not a regular input */
1325 assert(!reg->Register.Indirect);
1326 assert(!reg->Dimension.Indirect);
1327 res = bld->system_values.prim_id;
1328 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1329 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1334 if (reg->Register.Indirect) {
1335 int index_limit = info->file_max[reg->Register.File];
1336 attrib_index = get_indirect_index(bld,
1338 reg->Register.Index,
1342 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1345 if (reg->Dimension.Indirect) {
1346 vertex_index = get_indirect_index(bld,
1348 reg->Dimension.Index,
1350 PIPE_MAX_SHADER_INPUTS);
1352 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1355 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1356 res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1357 reg->Register.Indirect,
1361 res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1362 reg->Dimension.Indirect,
1364 reg->Register.Indirect,
1371 if (tgsi_type_is_64bit(stype)) {
1372 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1374 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1375 res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1376 reg->Register.Indirect,
1381 res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1382 reg->Dimension.Indirect,
1384 reg->Register.Indirect,
1390 res = emit_fetch_64bit(bld_base, stype, res, res2);
1391 } else if (stype == TGSI_TYPE_UNSIGNED) {
1392 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1393 } else if (stype == TGSI_TYPE_SIGNED) {
1394 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1403 emit_fetch_temporary(
1404 struct lp_build_tgsi_context * bld_base,
1405 const struct tgsi_full_src_register * reg,
1406 enum tgsi_opcode_type stype,
1407 unsigned swizzle_in)
1409 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1410 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1411 LLVMBuilderRef builder = gallivm->builder;
1413 unsigned swizzle = swizzle_in & 0xffff;
1415 if (reg->Register.Indirect) {
1416 LLVMValueRef indirect_index;
1417 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1418 LLVMValueRef temps_array;
1419 LLVMTypeRef fptr_type;
1421 indirect_index = get_indirect_index(bld,
1423 reg->Register.Index,
1425 bld->bld_base.info->file_max[reg->Register.File]);
1427 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1431 if (tgsi_type_is_64bit(stype)) {
1432 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1438 /* cast temps_array pointer to float* */
1439 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1440 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1442 /* Gather values from the temporary register array */
1443 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1446 LLVMValueRef temp_ptr;
1447 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1448 res = LLVMBuildLoad(builder, temp_ptr, "");
1450 if (tgsi_type_is_64bit(stype)) {
1451 LLVMValueRef temp_ptr2, res2;
1453 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1454 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1455 res = emit_fetch_64bit(bld_base, stype, res, res2);
1459 if (stype == TGSI_TYPE_SIGNED ||
1460 stype == TGSI_TYPE_UNSIGNED ||
1461 stype == TGSI_TYPE_DOUBLE ||
1462 stype == TGSI_TYPE_SIGNED64 ||
1463 stype == TGSI_TYPE_UNSIGNED64) {
1464 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1465 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1472 emit_fetch_system_value(
1473 struct lp_build_tgsi_context * bld_base,
1474 const struct tgsi_full_src_register * reg,
1475 enum tgsi_opcode_type stype,
1476 unsigned swizzle_in)
1478 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1479 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1480 const struct tgsi_shader_info *info = bld->bld_base.info;
1481 LLVMBuilderRef builder = gallivm->builder;
1483 enum tgsi_opcode_type atype; // Actual type of the value
1484 unsigned swizzle = swizzle_in & 0xffff;
1486 assert(!reg->Register.Indirect);
1488 switch (info->system_value_semantic_name[reg->Register.Index]) {
1489 case TGSI_SEMANTIC_INSTANCEID:
1490 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1491 atype = TGSI_TYPE_UNSIGNED;
1494 case TGSI_SEMANTIC_VERTEXID:
1495 res = bld->system_values.vertex_id;
1496 atype = TGSI_TYPE_UNSIGNED;
1499 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1500 res = bld->system_values.vertex_id_nobase;
1501 atype = TGSI_TYPE_UNSIGNED;
1504 case TGSI_SEMANTIC_BASEVERTEX:
1505 res = bld->system_values.basevertex;
1506 atype = TGSI_TYPE_UNSIGNED;
1509 case TGSI_SEMANTIC_BASEINSTANCE:
1510 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1511 atype = TGSI_TYPE_UNSIGNED;
1514 case TGSI_SEMANTIC_PRIMID:
1515 res = bld->system_values.prim_id;
1516 atype = TGSI_TYPE_UNSIGNED;
1519 case TGSI_SEMANTIC_INVOCATIONID:
1520 if (info->processor == PIPE_SHADER_TESS_CTRL)
1521 res = bld->system_values.invocation_id;
1523 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1524 atype = TGSI_TYPE_UNSIGNED;
1527 case TGSI_SEMANTIC_HELPER_INVOCATION:
1528 res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1529 atype = TGSI_TYPE_UNSIGNED;
1532 case TGSI_SEMANTIC_THREAD_ID:
1533 res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1534 atype = TGSI_TYPE_UNSIGNED;
1537 case TGSI_SEMANTIC_BLOCK_ID:
1538 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1539 atype = TGSI_TYPE_UNSIGNED;
1542 case TGSI_SEMANTIC_GRID_SIZE:
1543 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1544 atype = TGSI_TYPE_UNSIGNED;
1547 case TGSI_SEMANTIC_TESSCOORD:
1549 LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
1550 LLVMValueRef array_indexed = LLVMBuildGEP(gallivm->builder, bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
1551 res = LLVMBuildLoad(builder, array_indexed, "tess_coord");
1553 atype = TGSI_TYPE_FLOAT;
1556 case TGSI_SEMANTIC_FACE:
1557 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1558 atype = TGSI_TYPE_UNSIGNED;
1561 case TGSI_SEMANTIC_DRAWID:
1562 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1563 atype = TGSI_TYPE_UNSIGNED;
1566 case TGSI_SEMANTIC_SAMPLEID:
1567 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1568 atype = TGSI_TYPE_UNSIGNED;
1571 case TGSI_SEMANTIC_TESSOUTER:
1572 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1573 bld->system_values.tess_outer,
1574 lp_build_const_int32(gallivm, swizzle_in));
1575 atype = TGSI_TYPE_FLOAT;
1578 case TGSI_SEMANTIC_TESSINNER:
1579 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1580 bld->system_values.tess_inner,
1581 lp_build_const_int32(gallivm, swizzle_in));
1582 atype = TGSI_TYPE_FLOAT;
1585 case TGSI_SEMANTIC_VERTICESIN:
1586 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
1587 atype = TGSI_TYPE_UNSIGNED;
1591 assert(!"unexpected semantic in emit_fetch_system_value");
1592 res = bld_base->base.zero;
1593 atype = TGSI_TYPE_FLOAT;
1597 if (atype != stype) {
1598 if (stype == TGSI_TYPE_FLOAT) {
1599 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1600 } else if (stype == TGSI_TYPE_UNSIGNED) {
1601 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1602 } else if (stype == TGSI_TYPE_SIGNED) {
1603 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1611 * Register fetch with derivatives.
1615 struct lp_build_tgsi_soa_context *bld,
1624 /* TODO: use interpolation coeffs for inputs */
1627 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1630 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1634 * store an array of vec-length 64-bit into two arrays of vec_length floats
1636 * value is d0, d1, d2, d3 etc.
1637 * each 64-bit has high and low pieces x, y
1638 * so gets stored into the separate channels as:
1639 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1640 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1643 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1644 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1647 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1648 struct gallivm_state *gallivm = bld_base->base.gallivm;
1649 LLVMBuilderRef builder = gallivm->builder;
1650 struct lp_build_context *float_bld = &bld_base->base;
1652 LLVMValueRef temp, temp2;
1653 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1654 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1656 for (i = 0; i < bld_base->base.type.length; i++) {
1657 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1658 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1661 temp = LLVMBuildShuffleVector(builder, value,
1662 LLVMGetUndef(LLVMTypeOf(value)),
1663 LLVMConstVector(shuffles,
1664 bld_base->base.type.length),
1666 temp2 = LLVMBuildShuffleVector(builder, value,
1667 LLVMGetUndef(LLVMTypeOf(value)),
1668 LLVMConstVector(shuffles2,
1669 bld_base->base.type.length),
1672 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1673 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1677 emit_store_output(struct lp_build_tgsi_context *bld_base,
1678 enum tgsi_opcode_type dtype,
1679 const struct tgsi_full_dst_register *reg,
1681 unsigned chan_index,
1682 LLVMValueRef indirect_index,
1685 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1686 struct gallivm_state *gallivm = bld_base->base.gallivm;
1687 LLVMBuilderRef builder = gallivm->builder;
1688 struct lp_build_context *float_bld = &bld_base->base;
1690 /* Outputs are always stored as floats */
1691 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1693 if (reg->Register.Indirect) {
1694 LLVMValueRef index_vec; /* indexes into the output registers */
1695 LLVMValueRef outputs_array;
1696 LLVMTypeRef fptr_type;
1698 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1703 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1704 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1706 /* Scatter store values into output registers */
1707 emit_mask_scatter(bld, outputs_array, index_vec, value,
1711 assert(LLVMTypeOf(value) == float_bld->vec_type);
1712 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1715 if (tgsi_type_is_64bit(dtype)) {
1716 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1718 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1721 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1726 emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
1727 enum tgsi_opcode_type dtype,
1728 const struct tgsi_full_dst_register *reg,
1730 unsigned chan_index,
1731 LLVMValueRef indirect_index,
1734 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1735 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1736 const struct tgsi_shader_info *info = bld->bld_base.info;
1737 LLVMValueRef attrib_index = NULL;
1738 LLVMValueRef vertex_index = NULL;
1739 LLVMValueRef channel_index = NULL;
1741 if (reg->Register.Indirect) {
1743 * XXX: this is possibly not quite the right value, since file_max may be
1744 * larger than the max attrib index, due to it being the max of declared
1745 * inputs AND the max vertices per prim (which is 6 for tri adj).
1746 * It should however be safe to use (since we always allocate
1747 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1749 int index_limit = info->file_max[reg->Register.File];
1750 attrib_index = get_indirect_index(bld,
1752 reg->Register.Index,
1756 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1759 if (reg->Dimension.Indirect) {
1760 vertex_index = get_indirect_index(bld,
1762 reg->Dimension.Index,
1764 PIPE_MAX_SHADER_OUTPUTS);
1766 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1769 channel_index = lp_build_const_int32(gallivm, chan_index);
1771 assert(bld->tcs_iface->emit_store_output);
1772 bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1773 bld_base->info->output_semantic_name[reg->Register.Index],
1774 reg->Dimension.Indirect,
1776 reg->Register.Indirect,
1781 mask_vec(bld_base));
1785 emit_store_temp(struct lp_build_tgsi_context *bld_base,
1786 enum tgsi_opcode_type dtype,
1787 const struct tgsi_full_dst_register *reg,
1789 unsigned chan_index,
1790 LLVMValueRef indirect_index,
1793 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1794 struct gallivm_state *gallivm = bld_base->base.gallivm;
1795 LLVMBuilderRef builder = gallivm->builder;
1796 struct lp_build_context *float_bld = &bld_base->base;
1798 /* Temporaries are always stored as floats */
1799 if (!tgsi_type_is_64bit(dtype))
1800 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1802 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1804 if (reg->Register.Indirect) {
1805 LLVMValueRef index_vec; /* indexes into the temp registers */
1806 LLVMValueRef temps_array;
1807 LLVMTypeRef fptr_type;
1809 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1814 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1815 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1817 /* Scatter store values into temp registers */
1818 emit_mask_scatter(bld, temps_array, index_vec, value,
1822 LLVMValueRef temp_ptr;
1823 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1825 if (tgsi_type_is_64bit(dtype)) {
1826 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1827 reg->Register.Index,
1829 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1833 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1838 emit_store_address(struct lp_build_tgsi_context *bld_base,
1839 enum tgsi_opcode_type dtype,
1840 const struct tgsi_full_dst_register *reg,
1842 unsigned chan_index,
1843 LLVMValueRef indirect_index,
1846 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1847 struct gallivm_state *gallivm = bld_base->base.gallivm;
1848 LLVMBuilderRef builder = gallivm->builder;
1849 struct lp_build_context *int_bld = &bld_base->int_bld;
1851 assert(dtype == TGSI_TYPE_SIGNED);
1852 assert(LLVMTypeOf(value) == int_bld->vec_type);
1853 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1854 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1855 bld->addr[reg->Register.Index][chan_index]);
1863 struct lp_build_tgsi_context *bld_base,
1864 const struct tgsi_full_instruction *inst,
1866 unsigned chan_index,
1869 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1870 struct gallivm_state *gallivm = bld_base->base.gallivm;
1871 LLVMBuilderRef builder = gallivm->builder;
1872 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1873 struct lp_build_context *float_bld = &bld_base->base;
1874 LLVMValueRef indirect_index = NULL;
1875 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1880 * It is always assumed to be float.
1882 if (inst->Instruction.Saturate) {
1883 assert(dtype == TGSI_TYPE_FLOAT ||
1884 dtype == TGSI_TYPE_UNTYPED);
1885 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1886 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1889 if (reg->Register.Indirect) {
1891 * Currently the mesa/st doesn't generate indirect stores
1892 * to 64-bit values, it normally uses MOV to do indirect stores.
1894 assert(!tgsi_type_is_64bit(dtype));
1895 indirect_index = get_indirect_index(bld,
1897 reg->Register.Index,
1899 bld->bld_base.info->file_max[reg->Register.File]);
1901 assert(reg->Register.Index <=
1902 bld_base->info->file_max[reg->Register.File]);
1905 if (DEBUG_EXECUTION) {
1906 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1909 assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
1910 bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
1922 * Called at the beginning of the translation of each TGSI instruction, to
1923 * emit some debug code.
1927 struct lp_build_tgsi_context * bld_base,
1928 const struct tgsi_full_instruction * inst,
1929 const struct tgsi_opcode_info * info)
1932 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1934 if (DEBUG_EXECUTION) {
1936 * Dump the TGSI instruction.
1939 struct gallivm_state *gallivm = bld_base->base.gallivm;
1943 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1944 lp_build_printf(gallivm, buf);
1946 /* Dump the execution mask.
1948 if (bld->exec_mask.has_mask) {
1949 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1956 struct lp_build_tgsi_context * bld_base,
1957 const struct tgsi_full_instruction * inst,
1958 const struct tgsi_opcode_info * info,
1960 LLVMValueRef dst[4])
1963 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1965 unsigned writemask = inst->Dst[index].Register.WriteMask;
1967 unsigned chan_index = u_bit_scan(&writemask);
1968 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1970 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1975 tgsi_to_pipe_tex_target(enum tgsi_texture_type tgsi_target)
1977 switch (tgsi_target) {
1978 case TGSI_TEXTURE_BUFFER:
1980 case TGSI_TEXTURE_1D:
1981 case TGSI_TEXTURE_SHADOW1D:
1982 return PIPE_TEXTURE_1D;
1983 case TGSI_TEXTURE_2D:
1984 case TGSI_TEXTURE_SHADOW2D:
1985 case TGSI_TEXTURE_2D_MSAA:
1986 return PIPE_TEXTURE_2D;
1987 case TGSI_TEXTURE_3D:
1988 return PIPE_TEXTURE_3D;
1989 case TGSI_TEXTURE_CUBE:
1990 case TGSI_TEXTURE_SHADOWCUBE:
1991 return PIPE_TEXTURE_CUBE;
1992 case TGSI_TEXTURE_RECT:
1993 case TGSI_TEXTURE_SHADOWRECT:
1994 return PIPE_TEXTURE_RECT;
1995 case TGSI_TEXTURE_1D_ARRAY:
1996 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1997 return PIPE_TEXTURE_1D_ARRAY;
1998 case TGSI_TEXTURE_2D_ARRAY:
1999 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2000 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2001 return PIPE_TEXTURE_2D_ARRAY;
2002 case TGSI_TEXTURE_CUBE_ARRAY:
2003 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2004 return PIPE_TEXTURE_CUBE_ARRAY;
2012 static enum lp_sampler_lod_property
2013 lp_build_lod_property(
2014 struct lp_build_tgsi_context *bld_base,
2015 const struct tgsi_full_instruction *inst,
2018 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2019 enum lp_sampler_lod_property lod_property;
2022 * Not much we can do here. We could try catching inputs declared
2023 * with constant interpolation but not sure it's worth it - since for
2024 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2025 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2026 * like the constant/immediate recognition below.
2027 * What seems to be of more value would be to recognize temps holding
2028 * broadcasted scalars but no way we can do it.
2029 * Tried asking llvm but without any success (using LLVMIsConstant
2030 * even though this isn't exactly what we'd need), even as simple as
2031 * IMM[0] UINT32 (0,-1,0,0)
2032 * MOV TEMP[0] IMM[0].yyyy
2033 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2035 * This means there's ZERO chance this will ever catch a scalar lod
2036 * with traditional tex opcodes as well as texel fetches, since the lod
2037 * comes from the same reg as coords (except some test shaders using
2038 * constant coords maybe).
2039 * There's at least hope for sample opcodes as well as size queries.
2041 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
2042 reg->Register.File == TGSI_FILE_CONSTANT ||
2043 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2044 lod_property = LP_SAMPLER_LOD_SCALAR;
2046 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2047 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2048 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2051 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2055 /* never use scalar (per-quad) lod the results are just too wrong. */
2056 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2058 return lod_property;
2063 * High-level instruction translators.
2067 emit_tex( struct lp_build_tgsi_soa_context *bld,
2068 const struct tgsi_full_instruction *inst,
2069 enum lp_build_tex_modifier modifier,
2070 LLVMValueRef *texel,
2071 unsigned sampler_reg,
2072 enum lp_sampler_op_type sampler_op)
2074 unsigned unit = inst->Src[sampler_reg].Register.Index;
2075 LLVMValueRef oow = NULL;
2076 LLVMValueRef lod = NULL;
2077 LLVMValueRef coords[5];
2078 LLVMValueRef offsets[3] = { NULL };
2079 struct lp_derivatives derivs;
2080 struct lp_sampler_params params;
2081 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2082 unsigned num_derivs, num_offsets, i;
2083 unsigned shadow_coord = 0;
2084 unsigned layer_coord = 0;
2085 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2087 memset(¶ms, 0, sizeof(params));
2089 if (!bld->sampler) {
2090 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2091 for (i = 0; i < 4; i++) {
2092 texel[i] = bld->bld_base.base.undef;
2097 switch (inst->Texture.Texture) {
2098 case TGSI_TEXTURE_1D_ARRAY:
2101 case TGSI_TEXTURE_1D:
2105 case TGSI_TEXTURE_2D_ARRAY:
2108 case TGSI_TEXTURE_2D:
2109 case TGSI_TEXTURE_RECT:
2113 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2116 case TGSI_TEXTURE_SHADOW1D:
2121 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2127 case TGSI_TEXTURE_SHADOW2D:
2128 case TGSI_TEXTURE_SHADOWRECT:
2133 case TGSI_TEXTURE_CUBE:
2137 case TGSI_TEXTURE_3D:
2141 case TGSI_TEXTURE_SHADOWCUBE:
2146 case TGSI_TEXTURE_CUBE_ARRAY:
2151 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2155 shadow_coord = 4; /* shadow coord special different reg */
2157 case TGSI_TEXTURE_2D_MSAA:
2158 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2164 /* Note lod and especially projected are illegal in a LOT of cases */
2165 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2166 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2167 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
2168 lod = bld->bld_base.base.zero;
2169 } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2170 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2171 /* note that shadow cube array with bias/explicit lod does not exist */
2172 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2175 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2177 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2178 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2180 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2181 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2183 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2186 if (sampler_op == LP_SAMPLER_OP_GATHER) {
2187 uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2188 sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2190 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2191 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2192 oow = lp_build_rcp(&bld->bld_base.base, oow);
2195 for (i = 0; i < num_derivs; i++) {
2196 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2197 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2198 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2200 for (i = num_derivs; i < 5; i++) {
2201 coords[i] = bld->bld_base.base.undef;
2204 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2206 if (layer_coord == 3) {
2207 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2210 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2212 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2213 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2215 /* Shadow coord occupies always 5th slot. */
2217 sample_key |= LP_SAMPLER_SHADOW;
2218 if (shadow_coord == 4) {
2219 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2222 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2224 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2225 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2228 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2230 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2231 for (dim = 0; dim < num_derivs; ++dim) {
2232 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2233 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2235 params.derivs = &derivs;
2237 * could also check all src regs if constant but I doubt such
2238 * cases exist in practice.
2240 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2241 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2242 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2245 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2249 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2252 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2254 /* we don't handle the 4 offset version of tg4 */
2255 if (inst->Texture.NumOffsets == 1) {
2257 sample_key |= LP_SAMPLER_OFFSETS;
2258 for (dim = 0; dim < num_offsets; dim++) {
2259 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2263 params.type = bld->bld_base.base.type;
2264 params.sample_key = sample_key;
2265 params.texture_index = unit;
2266 params.sampler_index = unit;
2267 params.context_ptr = bld->context_ptr;
2268 params.thread_data_ptr = bld->thread_data_ptr;
2269 params.coords = coords;
2270 params.offsets = offsets;
2272 params.texel = texel;
2274 bld->sampler->emit_tex_sample(bld->sampler,
2275 bld->bld_base.base.gallivm,
2280 emit_sample(struct lp_build_tgsi_soa_context *bld,
2281 const struct tgsi_full_instruction *inst,
2282 enum lp_build_tex_modifier modifier,
2284 enum lp_sampler_op_type sample_type,
2285 LLVMValueRef *texel)
2287 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2288 unsigned texture_unit, sampler_unit;
2289 LLVMValueRef lod = NULL;
2290 LLVMValueRef coords[5];
2291 LLVMValueRef offsets[3] = { NULL };
2292 struct lp_derivatives derivs;
2293 struct lp_sampler_params params;
2294 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2296 unsigned num_offsets, num_derivs, i;
2297 unsigned layer_coord = 0;
2298 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2300 memset(¶ms, 0, sizeof(params));
2302 if (!bld->sampler) {
2303 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2304 for (i = 0; i < 4; i++) {
2305 texel[i] = bld->bld_base.base.undef;
2311 * unlike old-style tex opcodes the texture/sampler indices
2312 * always come from src1 and src2 respectively.
2314 texture_unit = inst->Src[1].Register.Index;
2315 sampler_unit = inst->Src[2].Register.Index;
2318 * Note inst->Texture.Texture will contain the number of offsets,
2319 * however the target information is NOT there and comes from the
2320 * declared sampler views instead.
2322 switch (bld->sv[texture_unit].Resource) {
2323 case TGSI_TEXTURE_1D:
2327 case TGSI_TEXTURE_1D_ARRAY:
2332 case TGSI_TEXTURE_2D:
2333 case TGSI_TEXTURE_RECT:
2337 case TGSI_TEXTURE_2D_ARRAY:
2342 case TGSI_TEXTURE_CUBE:
2346 case TGSI_TEXTURE_3D:
2350 case TGSI_TEXTURE_CUBE_ARRAY:
2360 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2361 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2362 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2363 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2364 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2366 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2367 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2369 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2371 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2372 /* XXX might be better to explicitly pass the level zero information */
2373 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2374 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2377 for (i = 0; i < num_derivs; i++) {
2378 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2380 for (i = num_derivs; i < 5; i++) {
2381 coords[i] = bld->bld_base.base.undef;
2384 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2386 if (layer_coord == 3)
2387 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2389 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2391 /* Shadow coord occupies always 5th slot. */
2393 sample_key |= LP_SAMPLER_SHADOW;
2394 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2397 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2399 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2400 for (dim = 0; dim < num_derivs; ++dim) {
2401 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2402 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2404 params.derivs = &derivs;
2406 * could also check all src regs if constant but I doubt such
2407 * cases exist in practice.
2409 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2410 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2411 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2414 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2418 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2422 /* some advanced gather instructions (txgo) would require 4 offsets */
2423 if (inst->Texture.NumOffsets == 1) {
2425 sample_key |= LP_SAMPLER_OFFSETS;
2426 for (dim = 0; dim < num_offsets; dim++) {
2427 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2430 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2432 params.type = bld->bld_base.base.type;
2433 params.sample_key = sample_key;
2434 params.texture_index = texture_unit;
2435 params.sampler_index = sampler_unit;
2436 params.context_ptr = bld->context_ptr;
2437 params.thread_data_ptr = bld->thread_data_ptr;
2438 params.coords = coords;
2439 params.offsets = offsets;
2441 params.texel = texel;
2443 bld->sampler->emit_tex_sample(bld->sampler,
2444 bld->bld_base.base.gallivm,
2447 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2448 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2449 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2450 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2451 unsigned char swizzles[4];
2452 swizzles[0] = inst->Src[1].Register.SwizzleX;
2453 swizzles[1] = inst->Src[1].Register.SwizzleY;
2454 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2455 swizzles[3] = inst->Src[1].Register.SwizzleW;
2457 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2462 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2463 const struct tgsi_full_instruction *inst,
2464 LLVMValueRef *texel,
2467 unsigned unit, target;
2468 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2469 LLVMValueRef explicit_lod = NULL;
2470 LLVMValueRef coords[5];
2471 LLVMValueRef offsets[3] = { NULL };
2472 LLVMValueRef ms_index = NULL;
2473 struct lp_sampler_params params;
2474 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2476 unsigned layer_coord = 0;
2477 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2479 memset(¶ms, 0, sizeof(params));
2481 if (!bld->sampler) {
2482 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2483 for (i = 0; i < 4; i++) {
2484 texel[i] = coord_undef;
2489 unit = inst->Src[1].Register.Index;
2492 target = bld->sv[unit].Resource;
2495 target = inst->Texture.Texture;
2499 case TGSI_TEXTURE_1D:
2500 case TGSI_TEXTURE_BUFFER:
2503 case TGSI_TEXTURE_1D_ARRAY:
2507 case TGSI_TEXTURE_2D:
2508 case TGSI_TEXTURE_RECT:
2509 case TGSI_TEXTURE_2D_MSAA:
2512 case TGSI_TEXTURE_2D_ARRAY:
2513 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2517 case TGSI_TEXTURE_3D:
2525 /* always have lod except for buffers and msaa targets ? */
2526 if (target != TGSI_TEXTURE_BUFFER &&
2527 target != TGSI_TEXTURE_2D_MSAA &&
2528 target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2529 inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2530 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2531 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2532 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2535 if (target == TGSI_TEXTURE_2D_MSAA ||
2536 target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2537 sample_key |= LP_SAMPLER_FETCH_MS;
2538 ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2542 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2543 * would be the sample index.
2546 for (i = 0; i < dims; i++) {
2547 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2549 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2550 for (i = dims; i < 5; i++) {
2551 coords[i] = coord_undef;
2554 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2556 if (inst->Texture.NumOffsets == 1) {
2558 sample_key |= LP_SAMPLER_OFFSETS;
2559 for (dim = 0; dim < dims; dim++) {
2560 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2563 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2565 params.type = bld->bld_base.base.type;
2566 params.sample_key = sample_key;
2567 params.texture_index = unit;
2569 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2570 * and trigger some assertions with d3d10 where the sampler view number
2573 params.sampler_index = 0;
2574 params.context_ptr = bld->context_ptr;
2575 params.thread_data_ptr = bld->thread_data_ptr;
2576 params.coords = coords;
2577 params.offsets = offsets;
2578 params.derivs = NULL;
2579 params.lod = explicit_lod;
2580 params.texel = texel;
2581 params.ms_index = ms_index;
2583 bld->sampler->emit_tex_sample(bld->sampler,
2584 bld->bld_base.base.gallivm,
2588 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2589 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2590 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2591 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2592 unsigned char swizzles[4];
2593 swizzles[0] = inst->Src[1].Register.SwizzleX;
2594 swizzles[1] = inst->Src[1].Register.SwizzleY;
2595 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2596 swizzles[3] = inst->Src[1].Register.SwizzleW;
2598 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2603 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2604 const struct tgsi_full_instruction *inst,
2605 LLVMValueRef *sizes_out,
2606 boolean is_sviewinfo)
2608 LLVMValueRef explicit_lod;
2609 enum lp_sampler_lod_property lod_property;
2612 unsigned unit = inst->Src[1].Register.Index;
2613 enum tgsi_texture_type target;
2614 enum pipe_texture_target pipe_target;
2615 struct lp_sampler_size_query_params params;
2618 target = bld->sv[unit].Resource;
2621 target = inst->Texture.Texture;
2624 case TGSI_TEXTURE_BUFFER:
2625 case TGSI_TEXTURE_RECT:
2626 case TGSI_TEXTURE_SHADOWRECT:
2634 if (!bld->sampler) {
2635 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2636 for (i = 0; i < 4; i++)
2637 sizes_out[i] = bld->bld_base.int_bld.undef;
2642 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2643 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2646 explicit_lod = NULL;
2647 lod_property = LP_SAMPLER_LOD_SCALAR;
2651 pipe_target = tgsi_to_pipe_tex_target(target);
2653 params.int_type = bld->bld_base.int_bld.type;
2654 params.texture_unit = unit;
2655 params.target = pipe_target;
2656 params.context_ptr = bld->context_ptr;
2657 params.is_sviewinfo = TRUE;
2658 params.lod_property = lod_property;
2659 params.explicit_lod = explicit_lod;
2660 params.sizes_out = sizes_out;
2661 params.samples_only = false;
2663 bld->sampler->emit_size_query(bld->sampler,
2664 bld->bld_base.base.gallivm,
2669 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2674 for (i = 0; i < 5; i++) {
2675 enum tgsi_opcode opcode;
2677 if (pc + i >= bld->bld_base.info->num_instructions)
2680 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2682 if (opcode == TGSI_OPCODE_END)
2685 if (opcode == TGSI_OPCODE_TEX ||
2686 opcode == TGSI_OPCODE_TXP ||
2687 opcode == TGSI_OPCODE_TXD ||
2688 opcode == TGSI_OPCODE_TXB ||
2689 opcode == TGSI_OPCODE_TXL ||
2690 opcode == TGSI_OPCODE_TXF ||
2691 opcode == TGSI_OPCODE_TXQ ||
2692 opcode == TGSI_OPCODE_TEX2 ||
2693 opcode == TGSI_OPCODE_TXB2 ||
2694 opcode == TGSI_OPCODE_TXL2 ||
2695 opcode == TGSI_OPCODE_SAMPLE ||
2696 opcode == TGSI_OPCODE_SAMPLE_B ||
2697 opcode == TGSI_OPCODE_SAMPLE_C ||
2698 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2699 opcode == TGSI_OPCODE_SAMPLE_D ||
2700 opcode == TGSI_OPCODE_SAMPLE_I ||
2701 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2702 opcode == TGSI_OPCODE_SAMPLE_L ||
2703 opcode == TGSI_OPCODE_SVIEWINFO ||
2704 opcode == TGSI_OPCODE_CAL ||
2705 opcode == TGSI_OPCODE_IF ||
2706 opcode == TGSI_OPCODE_UIF ||
2707 opcode == TGSI_OPCODE_BGNLOOP ||
2708 opcode == TGSI_OPCODE_SWITCH)
2718 * Kill fragment if any of the src register values are negative.
2722 struct lp_build_tgsi_soa_context *bld,
2723 const struct tgsi_full_instruction *inst,
2726 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2727 const struct tgsi_full_src_register *reg = &inst->Src[0];
2728 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2730 unsigned chan_index;
2732 memset(&terms, 0, sizeof terms);
2734 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2737 /* Unswizzle channel */
2738 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2740 /* Check if the component has not been already tested. */
2741 assert(swizzle < TGSI_NUM_CHANNELS);
2742 if( !terms[swizzle] )
2743 /* TODO: change the comparison operator instead of setting the sign */
2744 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2748 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2749 if(terms[chan_index]) {
2750 LLVMValueRef chan_mask;
2753 * If term < 0 then mask = 0 else mask = ~0.
2755 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2758 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2764 if (bld->exec_mask.has_mask) {
2765 LLVMValueRef invmask;
2766 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2767 mask = LLVMBuildOr(builder, mask, invmask, "");
2770 lp_build_mask_update(bld->mask, mask);
2771 if (!near_end_of_shader(bld, pc))
2772 lp_build_mask_check(bld->mask);
2777 * Unconditional fragment kill.
2778 * The only predication is the execution mask which will apply if
2779 * we're inside a loop or conditional.
2782 emit_kill(struct lp_build_tgsi_soa_context *bld,
2785 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2788 /* For those channels which are "alive", disable fragment shader
2791 if (bld->exec_mask.has_mask) {
2792 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2795 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2799 lp_build_mask_update(bld->mask, mask);
2801 if (!near_end_of_shader(bld, pc))
2802 lp_build_mask_check(bld->mask);
2807 * Emit code which will dump the value of all the temporary registers
2811 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2814 const struct tgsi_shader_info *info = bld->bld_base.info;
2815 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2816 LLVMBuilderRef builder = gallivm->builder;
2817 LLVMValueRef reg_ptr;
2819 int max_index = info->file_max[file];
2822 * Some register files, particularly constants, can be very large,
2823 * and dumping everything could make this unusably slow.
2825 max_index = MIN2(max_index, 32);
2827 for (index = 0; index <= max_index; index++) {
2832 if (index < 8 * sizeof(unsigned) &&
2833 (info->file_mask[file] & (1u << index)) == 0) {
2834 /* This was not declared.*/
2838 if (file == TGSI_FILE_INPUT) {
2839 mask = info->input_usage_mask[index];
2841 mask = TGSI_WRITEMASK_XYZW;
2844 for (chan = 0; chan < 4; chan++) {
2845 if ((mask & (1 << chan)) == 0) {
2846 /* This channel is not used.*/
2850 if (file == TGSI_FILE_CONSTANT) {
2851 struct tgsi_full_src_register reg;
2852 memset(®, 0, sizeof reg);
2853 reg.Register.File = file;
2854 reg.Register.Index = index;
2855 reg.Register.SwizzleX = 0;
2856 reg.Register.SwizzleY = 1;
2857 reg.Register.SwizzleZ = 2;
2858 reg.Register.SwizzleW = 3;
2860 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan);
2864 } else if (file == TGSI_FILE_INPUT) {
2865 res = bld->inputs[index][chan];
2869 } else if (file == TGSI_FILE_TEMPORARY) {
2870 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2872 res = LLVMBuildLoad(builder, reg_ptr, "");
2873 } else if (file == TGSI_FILE_OUTPUT) {
2874 reg_ptr = lp_get_output_ptr(bld, index, chan);
2876 res = LLVMBuildLoad(builder, reg_ptr, "");
2882 emit_dump_reg(gallivm, file, index, chan, res);
2890 lp_emit_declaration_soa(
2891 struct lp_build_tgsi_context *bld_base,
2892 const struct tgsi_full_declaration *decl)
2894 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2895 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2896 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2897 const unsigned first = decl->Range.First;
2898 const unsigned last = decl->Range.Last;
2901 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2903 switch (decl->Declaration.File) {
2904 case TGSI_FILE_TEMPORARY:
2905 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2906 assert(last < LP_MAX_INLINED_TEMPS);
2907 for (idx = first; idx <= last; ++idx) {
2908 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2909 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2914 case TGSI_FILE_OUTPUT:
2915 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2916 for (idx = first; idx <= last; ++idx) {
2917 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2918 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2919 vec_type, "output");
2924 case TGSI_FILE_ADDRESS:
2925 /* ADDR registers are only allocated with an integer LLVM IR type,
2926 * as they are guaranteed to always have integers.
2927 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2928 * an ADDR register for that matter).
2930 assert(last < LP_MAX_TGSI_ADDRS);
2931 for (idx = first; idx <= last; ++idx) {
2932 assert(idx < LP_MAX_TGSI_ADDRS);
2933 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2934 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2938 case TGSI_FILE_SAMPLER_VIEW:
2940 * The target stored here MUST match whatever there actually
2941 * is in the set sampler views (what about return type?).
2943 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2944 for (idx = first; idx <= last; ++idx) {
2945 bld->sv[idx] = decl->SamplerView;
2949 case TGSI_FILE_CONSTANT:
2952 * We could trivially fetch the per-buffer pointer when fetching the
2953 * constant, relying on llvm to figure out it's always the same pointer
2954 * anyway. However, doing so results in a huge (more than factor of 10)
2955 * slowdown in llvm compilation times for some (but not all) shaders
2956 * (more specifically, the IR optimization spends way more time in
2957 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2959 unsigned idx2D = decl->Dim.Index2D;
2960 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2961 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2962 bld->consts[idx2D] = lp_llvm_buffer_base(gallivm, bld->consts_ptr,
2963 index2D, LP_MAX_TGSI_CONST_BUFFERS);
2964 bld->consts[idx2D] = LLVMBuildBitCast(gallivm->builder, bld->consts[idx2D], LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0), "");
2965 bld->consts_sizes[idx2D] = lp_llvm_buffer_num_elements(gallivm, bld->consts_ptr,
2966 index2D, LP_MAX_TGSI_CONST_BUFFERS);
2969 case TGSI_FILE_BUFFER:
2971 unsigned idx = decl->Range.First;
2972 LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2973 assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2975 lp_llvm_buffer_base(gallivm, bld->ssbo_ptr,
2976 index, LP_MAX_TGSI_SHADER_BUFFERS);
2977 bld->ssbo_sizes[idx] =
2978 lp_llvm_buffer_num_elements(gallivm, bld->ssbo_ptr,
2979 index, LP_MAX_TGSI_SHADER_BUFFERS);
2983 case TGSI_FILE_MEMORY:
2986 /* don't need to declare other vars */
2992 void lp_emit_immediate_soa(
2993 struct lp_build_tgsi_context *bld_base,
2994 const struct tgsi_full_immediate *imm)
2996 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2997 struct gallivm_state * gallivm = bld_base->base.gallivm;
2998 LLVMValueRef imms[4];
3000 const uint size = imm->Immediate.NrTokens - 1;
3002 switch (imm->Immediate.DataType) {
3003 case TGSI_IMM_FLOAT32:
3004 for( i = 0; i < size; ++i )
3006 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
3009 case TGSI_IMM_FLOAT64:
3010 case TGSI_IMM_UINT64:
3011 case TGSI_IMM_INT64:
3012 case TGSI_IMM_UINT32:
3013 for( i = 0; i < size; ++i ) {
3014 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3015 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3019 case TGSI_IMM_INT32:
3020 for( i = 0; i < size; ++i ) {
3021 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3022 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3027 for( i = size; i < 4; ++i )
3028 imms[i] = bld_base->base.undef;
3030 if (bld->use_immediates_array) {
3031 unsigned index = bld->num_immediates;
3032 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3033 LLVMBuilderRef builder = gallivm->builder;
3034 LLVMValueRef gep[2];
3035 gep[0] = lp_build_const_int32(gallivm, 0);
3037 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3038 for (i = 0; i < 4; ++i ) {
3039 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3040 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3041 bld->imms_array, gep, 2, "");
3042 LLVMBuildStore(builder, imms[i], imm_ptr);
3045 /* simply copy the immediate values into the next immediates[] slot */
3047 assert(imm->Immediate.NrTokens - 1 <= 4);
3048 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3050 for(i = 0; i < 4; ++i )
3051 bld->immediates[bld->num_immediates][i] = imms[i];
3053 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3054 unsigned index = bld->num_immediates;
3055 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3056 LLVMBuilderRef builder = gallivm->builder;
3057 LLVMValueRef gep[2];
3058 gep[0] = lp_build_const_int32(gallivm, 0);
3059 for (i = 0; i < 4; ++i ) {
3060 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3061 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3062 bld->imms_array, gep, 2, "");
3063 LLVMBuildStore(builder,
3064 bld->immediates[index][i],
3070 bld->num_immediates++;
3075 const struct lp_build_tgsi_action * action,
3076 struct lp_build_tgsi_context * bld_base,
3077 struct lp_build_emit_data * emit_data)
3079 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3081 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3082 &emit_data->output[emit_data->chan], NULL);
3087 const struct lp_build_tgsi_action * action,
3088 struct lp_build_tgsi_context * bld_base,
3089 struct lp_build_emit_data * emit_data)
3091 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3093 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3094 &emit_data->output[emit_data->chan]);
3099 const struct lp_build_tgsi_action * action,
3100 struct lp_build_tgsi_context * bld_base,
3101 struct lp_build_emit_data * emit_data)
3103 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3105 emit_kill(bld, bld_base->pc - 1);
3110 const struct lp_build_tgsi_action * action,
3111 struct lp_build_tgsi_context * bld_base,
3112 struct lp_build_emit_data * emit_data)
3114 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3116 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3121 const struct lp_build_tgsi_action * action,
3122 struct lp_build_tgsi_context * bld_base,
3123 struct lp_build_emit_data * emit_data)
3125 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3127 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3128 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3133 const struct lp_build_tgsi_action * action,
3134 struct lp_build_tgsi_context * bld_base,
3135 struct lp_build_emit_data * emit_data)
3137 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3139 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3140 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3145 const struct lp_build_tgsi_action * action,
3146 struct lp_build_tgsi_context * bld_base,
3147 struct lp_build_emit_data * emit_data)
3149 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3151 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3152 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3157 const struct lp_build_tgsi_action * action,
3158 struct lp_build_tgsi_context * bld_base,
3159 struct lp_build_emit_data * emit_data)
3161 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3163 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3164 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3169 const struct lp_build_tgsi_action * action,
3170 struct lp_build_tgsi_context * bld_base,
3171 struct lp_build_emit_data * emit_data)
3173 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3175 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3176 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3181 const struct lp_build_tgsi_action * action,
3182 struct lp_build_tgsi_context * bld_base,
3183 struct lp_build_emit_data * emit_data)
3185 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3187 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3188 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3193 const struct lp_build_tgsi_action * action,
3194 struct lp_build_tgsi_context * bld_base,
3195 struct lp_build_emit_data * emit_data)
3197 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3199 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3200 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3205 const struct lp_build_tgsi_action * action,
3206 struct lp_build_tgsi_context * bld_base,
3207 struct lp_build_emit_data * emit_data)
3209 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3211 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3212 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3217 const struct lp_build_tgsi_action * action,
3218 struct lp_build_tgsi_context * bld_base,
3219 struct lp_build_emit_data * emit_data)
3221 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3223 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3224 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3229 const struct lp_build_tgsi_action * action,
3230 struct lp_build_tgsi_context * bld_base,
3231 struct lp_build_emit_data * emit_data)
3233 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3235 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3236 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3241 const struct lp_build_tgsi_action * action,
3242 struct lp_build_tgsi_context * bld_base,
3243 struct lp_build_emit_data * emit_data)
3245 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3247 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3252 const struct lp_build_tgsi_action * action,
3253 struct lp_build_tgsi_context * bld_base,
3254 struct lp_build_emit_data * emit_data)
3256 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3258 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3263 const struct lp_build_tgsi_action * action,
3264 struct lp_build_tgsi_context * bld_base,
3265 struct lp_build_emit_data * emit_data)
3267 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3269 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3274 const struct lp_build_tgsi_action * action,
3275 struct lp_build_tgsi_context * bld_base,
3276 struct lp_build_emit_data * emit_data)
3278 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3280 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3281 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3286 const struct lp_build_tgsi_action * action,
3287 struct lp_build_tgsi_context * bld_base,
3288 struct lp_build_emit_data * emit_data)
3290 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3292 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3293 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3298 const struct lp_build_tgsi_action * action,
3299 struct lp_build_tgsi_context * bld_base,
3300 struct lp_build_emit_data * emit_data)
3302 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3304 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3305 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3310 const struct lp_build_tgsi_action * action,
3311 struct lp_build_tgsi_context * bld_base,
3312 struct lp_build_emit_data * emit_data)
3314 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3316 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3317 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3322 const struct lp_build_tgsi_action * action,
3323 struct lp_build_tgsi_context * bld_base,
3324 struct lp_build_emit_data * emit_data)
3326 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3328 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3329 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3334 const struct lp_build_tgsi_action * action,
3335 struct lp_build_tgsi_context * bld_base,
3336 struct lp_build_emit_data * emit_data)
3338 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3340 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3341 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3346 const struct lp_build_tgsi_action * action,
3347 struct lp_build_tgsi_context * bld_base,
3348 struct lp_build_emit_data * emit_data)
3350 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3352 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3353 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3358 const struct lp_build_tgsi_action * action,
3359 struct lp_build_tgsi_context * bld_base,
3360 struct lp_build_emit_data * emit_data)
3362 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3364 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3369 const struct lp_build_tgsi_action * action,
3370 struct lp_build_tgsi_context * bld_base,
3371 struct lp_build_emit_data * emit_data)
3373 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3375 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3376 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3380 target_to_dims_layer(enum tgsi_texture_type target,
3382 unsigned *layer_coord)
3386 case TGSI_TEXTURE_1D:
3387 case TGSI_TEXTURE_BUFFER:
3390 case TGSI_TEXTURE_1D_ARRAY:
3394 case TGSI_TEXTURE_2D:
3395 case TGSI_TEXTURE_RECT:
3398 case TGSI_TEXTURE_2D_ARRAY:
3402 case TGSI_TEXTURE_3D:
3403 case TGSI_TEXTURE_CUBE:
3404 case TGSI_TEXTURE_CUBE_ARRAY:
3416 const struct lp_build_tgsi_action * action,
3417 struct lp_build_tgsi_context * bld_base,
3418 struct lp_build_emit_data * emit_data)
3420 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3421 struct lp_img_params params;
3422 LLVMValueRef coords[5];
3423 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3425 enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3426 unsigned layer_coord;
3428 target_to_dims_layer(target, &dims, &layer_coord);
3430 for (unsigned i = 0; i < dims; i++) {
3431 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3433 for (unsigned i = dims; i < 5; i++) {
3434 coords[i] = coord_undef;
3437 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3439 memset(¶ms, 0, sizeof(params));
3441 params.type = bld->bld_base.base.type;
3442 params.context_ptr = bld->context_ptr;
3443 params.thread_data_ptr = bld->thread_data_ptr;
3444 params.coords = coords;
3445 params.outdata = emit_data->output;
3446 params.target = tgsi_to_pipe_tex_target(target);
3447 params.image_index = emit_data->inst->Src[0].Register.Index;
3448 params.img_op = LP_IMG_LOAD;
3449 bld->image->emit_op(bld->image,
3450 bld->bld_base.base.gallivm,
3456 const struct lp_build_tgsi_action * action,
3457 struct lp_build_tgsi_context * bld_base,
3458 struct lp_build_emit_data * emit_data)
3460 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3461 struct gallivm_state * gallivm = bld_base->base.gallivm;
3462 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3463 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3464 unsigned buf = bufreg->Register.Index;
3465 assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3466 bufreg->Register.File == TGSI_FILE_IMAGE ||
3467 bufreg->Register.File == TGSI_FILE_MEMORY ||
3468 bufreg->Register.File == TGSI_FILE_CONSTBUF);
3469 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3470 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3472 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3473 img_load_emit(action, bld_base, emit_data);
3474 } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3475 LLVMValueRef consts_ptr = bld->consts[buf];
3476 LLVMValueRef num_consts = bld->consts_sizes[buf];
3478 LLVMValueRef indirect_index;
3479 LLVMValueRef overflow_mask;
3481 indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3482 indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3484 /* All fetches are from the same constant buffer, so
3485 * we need to propagate the size to a vector to do a
3486 * vector comparison */
3487 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3489 /* Gather values from the constant buffer */
3490 unsigned chan_index;
3491 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3492 /* Construct a boolean vector telling us which channels
3493 * overflow the bound constant buffer */
3494 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3495 indirect_index, num_consts);
3497 /* index_vec = indirect_index * 4 */
3498 LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3499 index_vec = lp_build_add(uint_bld, index_vec,
3500 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3502 emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3505 /* for indirect support with ARB_gpu_shader5 */
3508 LLVMValueRef scalar, scalar_ptr;
3509 unsigned chan_index;
3511 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3512 index = lp_build_shr_imm(uint_bld, index, 2);
3514 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3516 LLVMValueRef ssbo_limit = NULL;
3519 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3520 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3523 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3524 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3526 LLVMValueRef exec_mask = mask_vec(bld_base);
3528 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3529 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3532 LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3533 struct lp_build_loop_state loop_state;
3534 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3536 struct lp_build_if_state ifthen;
3537 LLVMValueRef cond, temp_res;
3539 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3540 loop_state.counter, "");
3542 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3543 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3545 lp_build_if(&ifthen, gallivm, cond);
3546 scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3548 temp_res = LLVMBuildLoad(builder, result, "");
3549 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3550 LLVMBuildStore(builder, temp_res, result);
3551 lp_build_else(&ifthen);
3552 temp_res = LLVMBuildLoad(builder, result, "");
3553 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3554 LLVMBuildStore(builder, temp_res, result);
3555 lp_build_endif(&ifthen);
3556 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3558 emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3565 const struct lp_build_tgsi_action * action,
3566 struct lp_build_tgsi_context * bld_base,
3567 struct lp_build_emit_data * emit_data)
3569 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3570 struct lp_img_params params;
3571 LLVMValueRef coords[5];
3572 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3574 enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3575 unsigned layer_coord;
3577 target_to_dims_layer(target, &dims, &layer_coord);
3578 for (unsigned i = 0; i < dims; i++) {
3579 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3581 for (unsigned i = dims; i < 5; i++) {
3582 coords[i] = coord_undef;
3585 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3586 memset(¶ms, 0, sizeof(params));
3588 params.type = bld->bld_base.base.type;
3589 params.context_ptr = bld->context_ptr;
3590 params.thread_data_ptr = bld->thread_data_ptr;
3591 params.coords = coords;
3592 params.outdata = NULL;
3593 params.exec_mask = mask_vec(bld_base);
3594 params.target = tgsi_to_pipe_tex_target(target);
3595 params.image_index = emit_data->inst->Dst[0].Register.Index;
3596 params.img_op = LP_IMG_STORE;
3597 for (unsigned i = 0; i < 4; i++)
3598 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3600 bld->image->emit_op(bld->image,
3601 bld->bld_base.base.gallivm,
3607 const struct lp_build_tgsi_action * action,
3608 struct lp_build_tgsi_context * bld_base,
3609 struct lp_build_emit_data * emit_data)
3611 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3612 struct gallivm_state * gallivm = bld_base->base.gallivm;
3613 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3614 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3615 const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3616 unsigned buf = bufreg->Register.Index;
3617 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3618 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3620 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3621 img_store_emit(action, bld_base, emit_data);
3625 LLVMValueRef index; /* index into the const buffer */
3626 LLVMValueRef scalar_ptr;
3628 unsigned chan_index;
3630 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3631 index = lp_build_shr_imm(uint_bld, index, 2);
3633 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3635 LLVMValueRef ssbo_limit = NULL;
3638 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3639 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3642 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3643 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3645 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3647 LLVMValueRef exec_mask = mask_vec(bld_base);
3649 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3650 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3653 struct lp_build_loop_state loop_state;
3654 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3656 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3657 loop_state.counter, "");
3658 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3660 struct lp_build_if_state ifthen;
3663 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3664 loop_state.counter, "");
3666 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3667 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3668 lp_build_if(&ifthen, gallivm, cond);
3670 lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3672 lp_build_endif(&ifthen);
3673 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3681 const struct lp_build_tgsi_action * action,
3682 struct lp_build_tgsi_context * bld_base,
3683 struct lp_build_emit_data * emit_data)
3685 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3686 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3687 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3689 unsigned buf = bufreg->Register.Index;
3690 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3692 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3693 enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3694 struct lp_sampler_size_query_params params = { 0 };
3695 params.int_type = bld->bld_base.int_bld.type;
3696 params.texture_unit = buf;
3697 params.target = tgsi_to_pipe_tex_target(target);
3698 params.context_ptr = bld->context_ptr;
3699 params.sizes_out = emit_data->output;
3701 bld->image->emit_size_query(bld->image,
3702 bld->bld_base.base.gallivm,
3705 LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3707 emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3713 const struct lp_build_tgsi_action * action,
3714 struct lp_build_tgsi_context * bld_base,
3715 struct lp_build_emit_data * emit_data,
3716 LLVMAtomicRMWBinOp op)
3718 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3719 struct lp_img_params params;
3720 LLVMValueRef coords[5];
3721 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3723 unsigned layer_coord;
3724 enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3726 target_to_dims_layer(target, &dims, &layer_coord);
3728 for (unsigned i = 0; i < dims; i++) {
3729 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3731 for (unsigned i = dims; i < 5; i++) {
3732 coords[i] = coord_undef;
3735 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3736 memset(¶ms, 0, sizeof(params));
3738 params.type = bld->bld_base.base.type;
3739 params.context_ptr = bld->context_ptr;
3740 params.thread_data_ptr = bld->thread_data_ptr;
3741 params.exec_mask = mask_vec(bld_base);
3742 params.image_index = emit_data->inst->Src[0].Register.Index;
3743 params.coords = coords;
3744 params.target = tgsi_to_pipe_tex_target(target);
3746 params.outdata = emit_data->output;
3747 params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3749 for (unsigned i = 0; i < 4; i++)
3750 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3751 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3752 for (unsigned i = 0; i < 4; i++)
3753 params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3755 bld->image->emit_op(bld->image,
3756 bld->bld_base.base.gallivm,
3762 const struct lp_build_tgsi_action * action,
3763 struct lp_build_tgsi_context * bld_base,
3764 struct lp_build_emit_data * emit_data)
3766 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3767 struct gallivm_state * gallivm = bld_base->base.gallivm;
3768 LLVMBuilderRef builder = gallivm->builder;
3769 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3770 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3772 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3773 unsigned buf = bufreg->Register.Index;
3774 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3776 LLVMAtomicRMWBinOp op = -1;
3777 switch (emit_data->inst->Instruction.Opcode) {
3778 case TGSI_OPCODE_ATOMUADD:
3779 op = LLVMAtomicRMWBinOpAdd;
3781 case TGSI_OPCODE_ATOMXCHG:
3782 op = LLVMAtomicRMWBinOpXchg;
3784 case TGSI_OPCODE_ATOMAND:
3785 op = LLVMAtomicRMWBinOpAnd;
3787 case TGSI_OPCODE_ATOMOR:
3788 op = LLVMAtomicRMWBinOpOr;
3790 case TGSI_OPCODE_ATOMXOR:
3791 op = LLVMAtomicRMWBinOpXor;
3793 case TGSI_OPCODE_ATOMUMIN:
3794 op = LLVMAtomicRMWBinOpUMin;
3796 case TGSI_OPCODE_ATOMUMAX:
3797 op = LLVMAtomicRMWBinOpUMax;
3799 case TGSI_OPCODE_ATOMIMIN:
3800 op = LLVMAtomicRMWBinOpMin;
3802 case TGSI_OPCODE_ATOMIMAX:
3803 op = LLVMAtomicRMWBinOpMax;
3805 case TGSI_OPCODE_ATOMCAS:
3812 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3813 img_atomic_emit(action, bld_base, emit_data, op);
3816 LLVMValueRef index; /* index into the const buffer */
3817 LLVMValueRef scalar, scalar_ptr;
3820 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3821 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3823 index = lp_build_shr_imm(uint_bld, index, 2);
3826 index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3827 scalar_ptr = bld->ssbos[buf];
3829 scalar_ptr = bld->shared_ptr;
3831 LLVMValueRef atom_res = lp_build_alloca(gallivm,
3832 uint_bld->vec_type, "");
3834 LLVMValueRef ssbo_limit;
3836 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3837 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3840 LLVMValueRef exec_mask = mask_vec(bld_base);
3843 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3844 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3847 struct lp_build_loop_state loop_state;
3848 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3850 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3851 loop_state.counter, "");
3852 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3854 index = LLVMBuildExtractElement(gallivm->builder, index,
3855 loop_state.counter, "");
3857 scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3860 struct lp_build_if_state ifthen;
3861 LLVMValueRef cond, temp_res;
3863 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3864 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3865 lp_build_if(&ifthen, gallivm, cond);
3867 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3868 LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3869 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3870 loop_state.counter, "");
3871 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3872 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3874 LLVMAtomicOrderingSequentiallyConsistent,
3875 LLVMAtomicOrderingSequentiallyConsistent,
3877 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3879 scalar = LLVMBuildAtomicRMW(builder, op,
3880 scalar_ptr, value_ptr,
3881 LLVMAtomicOrderingSequentiallyConsistent,
3884 temp_res = LLVMBuildLoad(builder, atom_res, "");
3885 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3886 LLVMBuildStore(builder, temp_res, atom_res);
3887 lp_build_else(&ifthen);
3888 temp_res = LLVMBuildLoad(builder, atom_res, "");
3889 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3890 LLVMBuildStore(builder, temp_res, atom_res);
3891 lp_build_endif(&ifthen);
3893 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3895 emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3901 const struct lp_build_tgsi_action * action,
3902 struct lp_build_tgsi_context * bld_base,
3903 struct lp_build_emit_data * emit_data)
3905 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3906 struct gallivm_state * gallivm = bld_base->base.gallivm;
3908 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3910 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3911 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3916 const struct lp_build_tgsi_action * action,
3917 struct lp_build_tgsi_context * bld_base,
3918 struct lp_build_emit_data * emit_data)
3920 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3921 LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3925 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3929 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3930 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3932 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3934 LLVMBuildStore(builder, current_vec, ptr);
3938 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3942 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3943 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3945 current_vec = lp_build_select(&bld_base->uint_bld,
3947 bld_base->uint_bld.zero,
3950 LLVMBuildStore(builder, current_vec, ptr);
3954 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3955 LLVMValueRef current_mask_vec,
3956 LLVMValueRef total_emitted_vertices_vec)
3958 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3959 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3960 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3961 total_emitted_vertices_vec,
3962 bld->max_output_vertices_vec);
3964 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3969 const struct lp_build_tgsi_action * action,
3970 struct lp_build_tgsi_context * bld_base,
3971 struct lp_build_emit_data * emit_data)
3973 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3974 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3976 if (bld->gs_iface->emit_vertex) {
3977 LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0],
3979 emit_data->inst->Src[0].Register.SwizzleX);
3980 LLVMValueRef mask = mask_vec(bld_base);
3981 LLVMValueRef total_emitted_vertices_vec =
3982 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3984 mask = clamp_mask_to_max_output_vertices(bld, mask,
3985 total_emitted_vertices_vec);
3986 gather_outputs(bld);
3987 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
3989 total_emitted_vertices_vec,
3992 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3994 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3997 lp_build_print_value(bld->bld_base.base.gallivm,
3998 " +++ emit vertex masked ones = ",
4000 lp_build_print_value(bld->bld_base.base.gallivm,
4001 " +++ emit vertex emitted = ",
4002 total_emitted_vertices_vec);
4009 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
4012 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4013 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
4015 if (bld->gs_iface->end_primitive) {
4016 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4017 LLVMValueRef emitted_vertices_vec =
4018 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
4019 LLVMValueRef emitted_prims_vec =
4020 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4021 LLVMValueRef total_emitted_vertices_vec =
4022 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4023 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4024 emitted_vertices_vec,
4026 /* We need to combine the current execution mask with the mask
4027 telling us which, if any, execution slots actually have
4028 unemitted primitives, this way we make sure that end_primitives
4029 executes only on the paths that have unflushed vertices */
4030 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4032 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
4033 total_emitted_vertices_vec,
4034 emitted_vertices_vec,
4036 mask_vec(bld_base), 0);
4039 lp_build_print_value(bld->bld_base.base.gallivm,
4040 " +++ end prim masked ones = ",
4042 lp_build_print_value(bld->bld_base.base.gallivm,
4043 " +++ end prim emitted verts1 = ",
4044 emitted_vertices_vec);
4045 lp_build_print_value(bld->bld_base.base.gallivm,
4046 " +++ end prim emitted prims1 = ",
4047 LLVMBuildLoad(builder,
4048 bld->emitted_prims_vec_ptr, ""));
4050 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4052 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4055 lp_build_print_value(bld->bld_base.base.gallivm,
4056 " +++ end prim emitted verts2 = ",
4057 LLVMBuildLoad(builder,
4058 bld->emitted_vertices_vec_ptr, ""));
4066 const struct lp_build_tgsi_action * action,
4067 struct lp_build_tgsi_context * bld_base,
4068 struct lp_build_emit_data * emit_data)
4070 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4072 if (bld->gs_iface->end_primitive) {
4073 LLVMValueRef mask = mask_vec(bld_base);
4074 end_primitive_masked(bld_base, mask);
4080 const struct lp_build_tgsi_action * action,
4081 struct lp_build_tgsi_context * bld_base,
4082 struct lp_build_emit_data * emit_data)
4084 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4086 if (bld->tcs_iface->emit_barrier) {
4087 bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
4094 const struct lp_build_tgsi_action * action,
4095 struct lp_build_tgsi_context * bld_base,
4096 struct lp_build_emit_data * emit_data)
4098 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4100 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4106 const struct lp_build_tgsi_action * action,
4107 struct lp_build_tgsi_context * bld_base,
4108 struct lp_build_emit_data * emit_data)
4110 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4112 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4117 const struct lp_build_tgsi_action * action,
4118 struct lp_build_tgsi_context * bld_base,
4119 struct lp_build_emit_data * emit_data)
4121 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4123 lp_exec_tgsi_break(&bld->exec_mask, bld_base);
4128 const struct lp_build_tgsi_action * action,
4129 struct lp_build_tgsi_context * bld_base,
4130 struct lp_build_emit_data * emit_data)
4133 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4135 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4136 emit_data->args[0], bld->bld_base.base.zero);
4137 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4142 const struct lp_build_tgsi_action * action,
4143 struct lp_build_tgsi_context * bld_base,
4144 struct lp_build_emit_data * emit_data)
4147 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4148 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4150 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4151 emit_data->args[0], uint_bld->zero);
4152 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4157 const struct lp_build_tgsi_action * action,
4158 struct lp_build_tgsi_context * bld_base,
4159 struct lp_build_emit_data * emit_data)
4161 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4163 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4168 const struct lp_build_tgsi_action * action,
4169 struct lp_build_tgsi_context * bld_base,
4170 struct lp_build_emit_data * emit_data)
4172 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4174 lp_exec_default(&bld->exec_mask, bld_base);
4179 const struct lp_build_tgsi_action * action,
4180 struct lp_build_tgsi_context * bld_base,
4181 struct lp_build_emit_data * emit_data)
4183 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4185 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4190 const struct lp_build_tgsi_action * action,
4191 struct lp_build_tgsi_context * bld_base,
4192 struct lp_build_emit_data * emit_data)
4194 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4196 lp_exec_endswitch(&bld->exec_mask, bld_base);
4201 const struct lp_build_tgsi_action * action,
4202 struct lp_build_tgsi_context * bld_base,
4203 struct lp_build_emit_data * emit_data)
4205 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4207 lp_exec_bgnloop(&bld->exec_mask, true);
4212 const struct lp_build_tgsi_action * action,
4213 struct lp_build_tgsi_context * bld_base,
4214 struct lp_build_emit_data * emit_data)
4216 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4218 lp_exec_mask_bgnsub(&bld->exec_mask);
4223 const struct lp_build_tgsi_action * action,
4224 struct lp_build_tgsi_context * bld_base,
4225 struct lp_build_emit_data * emit_data)
4227 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4229 lp_exec_mask_cond_invert(&bld->exec_mask);
4234 const struct lp_build_tgsi_action * action,
4235 struct lp_build_tgsi_context * bld_base,
4236 struct lp_build_emit_data * emit_data)
4238 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4240 lp_exec_mask_cond_pop(&bld->exec_mask);
4245 const struct lp_build_tgsi_action * action,
4246 struct lp_build_tgsi_context * bld_base,
4247 struct lp_build_emit_data * emit_data)
4249 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4251 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
4256 const struct lp_build_tgsi_action * action,
4257 struct lp_build_tgsi_context * bld_base,
4258 struct lp_build_emit_data * emit_data)
4260 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4262 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4267 const struct lp_build_tgsi_action * action,
4268 struct lp_build_tgsi_context * bld_base,
4269 struct lp_build_emit_data * emit_data)
4271 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4273 lp_exec_continue(&bld->exec_mask);
4276 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4278 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4279 struct gallivm_state * gallivm = bld_base->base.gallivm;
4281 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4282 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4283 bld->temps_array = lp_build_alloca_undef(gallivm,
4284 LLVMArrayType(bld_base->base.vec_type, array_size),
4288 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4289 LLVMValueRef array_size =
4290 lp_build_const_int32(gallivm,
4291 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4292 bld->outputs_array = lp_build_array_alloca(gallivm,
4293 bld_base->base.vec_type, array_size,
4297 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4298 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4299 bld->imms_array = lp_build_alloca_undef(gallivm,
4300 LLVMArrayType(bld_base->base.vec_type, array_size),
4304 /* If we have indirect addressing in inputs we need to copy them into
4305 * our alloca array to be able to iterate over them */
4306 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
4307 !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
4308 unsigned index, chan;
4309 LLVMTypeRef vec_type = bld_base->base.vec_type;
4310 LLVMValueRef array_size = lp_build_const_int32(gallivm,
4311 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4312 bld->inputs_array = lp_build_array_alloca(gallivm,
4313 vec_type, array_size,
4316 assert(bld_base->info->num_inputs
4317 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4319 for (index = 0; index < bld_base->info->num_inputs; ++index) {
4320 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4321 LLVMValueRef lindex =
4322 lp_build_const_int32(gallivm, index * 4 + chan);
4323 LLVMValueRef input_ptr =
4324 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
4326 LLVMValueRef value = bld->inputs[index][chan];
4328 LLVMBuildStore(gallivm->builder, value, input_ptr);
4333 if (bld->gs_iface) {
4334 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4335 bld->emitted_prims_vec_ptr =
4336 lp_build_alloca(gallivm,
4338 "emitted_prims_ptr");
4339 bld->emitted_vertices_vec_ptr =
4340 lp_build_alloca(gallivm,
4342 "emitted_vertices_ptr");
4343 bld->total_emitted_vertices_vec_ptr =
4344 lp_build_alloca(gallivm,
4346 "total_emitted_vertices_ptr");
4348 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4349 bld->emitted_prims_vec_ptr);
4350 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4351 bld->emitted_vertices_vec_ptr);
4352 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4353 bld->total_emitted_vertices_vec_ptr);
4356 if (DEBUG_EXECUTION) {
4357 lp_build_printf(gallivm, "\n");
4358 emit_dump_file(bld, TGSI_FILE_CONSTANT);
4360 emit_dump_file(bld, TGSI_FILE_INPUT);
4364 static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
4366 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4368 if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
4369 bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
4373 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4375 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4376 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4378 if (DEBUG_EXECUTION) {
4381 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4383 emit_dump_file(bld, TGSI_FILE_OUTPUT);
4384 lp_build_printf(bld_base->base.gallivm, "\n");
4387 if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
4388 bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
4391 /* If we have indirect addressing in outputs we need to copy our alloca array
4392 * to the outputs slots specified by the caller */
4393 if (bld->gs_iface) {
4394 LLVMValueRef total_emitted_vertices_vec;
4395 LLVMValueRef emitted_prims_vec;
4396 /* implicit end_primitives, needed in case there are any unflushed
4397 vertices in the cache. Note must not call end_primitive here
4398 since the exec_mask is not valid at this point. */
4399 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4401 total_emitted_vertices_vec =
4402 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4404 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4406 bld->gs_iface->gs_epilogue(bld->gs_iface,
4407 total_emitted_vertices_vec,
4408 emitted_prims_vec, 0);
4410 gather_outputs(bld);
4415 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4416 const struct tgsi_token *tokens,
4417 const struct lp_build_tgsi_params *params,
4418 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4420 struct lp_build_tgsi_soa_context bld;
4421 struct lp_type type = params->type;
4422 struct lp_type res_type;
4424 assert(type.length <= LP_MAX_VECTOR_LENGTH);
4425 memset(&res_type, 0, sizeof res_type);
4426 res_type.width = type.width;
4427 res_type.length = type.length;
4430 /* Setup build context */
4431 memset(&bld, 0, sizeof bld);
4432 lp_build_context_init(&bld.bld_base.base, gallivm, type);
4433 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4434 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4435 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4437 struct lp_type dbl_type;
4439 dbl_type.width *= 2;
4440 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4443 struct lp_type uint64_type;
4444 uint64_type = lp_uint_type(type);
4445 uint64_type.width *= 2;
4446 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4449 struct lp_type int64_type;
4450 int64_type = lp_int_type(type);
4451 int64_type.width *= 2;
4452 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4454 bld.mask = params->mask;
4455 bld.inputs = params->inputs;
4456 bld.outputs = outputs;
4457 bld.consts_ptr = params->consts_ptr;
4458 bld.ssbo_ptr = params->ssbo_ptr;
4459 bld.sampler = params->sampler;
4460 bld.bld_base.info = params->info;
4461 bld.indirect_files = params->info->indirect_files;
4462 bld.context_ptr = params->context_ptr;
4463 bld.thread_data_ptr = params->thread_data_ptr;
4464 bld.image = params->image;
4465 bld.shared_ptr = params->shared_ptr;
4466 bld.coro = params->coro;
4469 * If the number of temporaries is rather large then we just
4470 * allocate them as an array right from the start and treat
4471 * like indirect temporaries.
4473 if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4474 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4477 * For performance reason immediates are always backed in a static
4478 * array, but if their number is too great, we have to use just
4479 * a dynamically allocated array.
4481 bld.use_immediates_array =
4482 (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4483 if (bld.use_immediates_array) {
4484 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4488 bld.bld_base.soa = TRUE;
4489 bld.bld_base.emit_debug = emit_debug;
4490 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4491 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4492 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4493 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4494 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4496 bld.bld_base.emit_store = emit_store;
4497 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
4498 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
4499 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
4501 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4502 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4504 bld.bld_base.emit_prologue = emit_prologue;
4505 bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
4506 bld.bld_base.emit_epilogue = emit_epilogue;
4508 /* Set opcode actions */
4509 lp_set_default_actions_cpu(&bld.bld_base);
4511 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4512 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4513 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4514 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4515 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4516 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4517 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4518 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4519 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4520 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4521 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4522 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4523 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4524 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4525 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4526 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4527 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4528 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4529 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4530 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4531 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4532 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4533 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4534 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4535 bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4536 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4537 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4538 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4539 bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4540 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4541 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4542 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4543 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4544 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4545 /* DX10 sampling ops */
4546 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4547 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4548 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4549 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4550 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4551 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4552 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4553 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4554 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4555 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4556 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4558 bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4559 bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4560 bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4562 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4563 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4564 bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4565 bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4566 bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4567 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4568 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4569 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4570 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4571 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4573 bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4574 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4576 if (params->gs_iface) {
4577 /* There's no specific value for this because it should always
4578 * be set, but apps using ext_geometry_shader4 quite often
4579 * were forgetting so we're using MAX_VERTEX_VARYING from
4580 * that spec even though we could assert if it's not
4581 * set, but that's a lot uglier. */
4582 uint max_output_vertices;
4584 /* inputs are always indirect with gs */
4585 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4586 bld.gs_iface = params->gs_iface;
4587 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4588 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4589 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4591 max_output_vertices =
4592 params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4593 if (!max_output_vertices)
4594 max_output_vertices = 32;
4596 bld.max_output_vertices_vec =
4597 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4598 max_output_vertices);
4601 if (params->tes_iface) {
4602 /* inputs are always indirect with tes */
4603 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4604 bld.tes_iface = params->tes_iface;
4605 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
4608 if (params->tcs_iface) {
4609 bld.tcs_iface = params->tcs_iface;
4610 /* outputs and inputs are always indirect with tcs */
4611 bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
4612 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
4613 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4614 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
4615 bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
4616 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
4619 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4621 bld.system_values = *params->system_values;
4623 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4626 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4627 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4628 debug_printf("11111111111111111111111111111 \n");
4629 tgsi_dump(tokens, 0);
4630 lp_debug_dump_value(function);
4631 debug_printf("2222222222222222222222222222 \n");
4635 LLVMModuleRef module = LLVMGetGlobalParent(
4636 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4637 LLVMDumpModule(module);
4640 lp_exec_mask_fini(&bld.exec_mask);