{
LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
LLVMValueRef offset = get_src(bld_base, instr->src[1]);
+ bool index_and_offset_are_uniform = nir_src_is_always_uniform(instr->src[0]) && nir_src_is_always_uniform(instr->src[1]);
bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
- idx, offset, result);
+ index_and_offset_are_uniform, idx, offset, result);
}
static void visit_store_ssbo(struct lp_build_nir_context *bld_base,
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
LLVMValueRef offset = get_src(bld_base, instr->src[0]);
+ bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
- NULL, offset, result);
+ offset_is_uniform, NULL, offset, result);
}
static void visit_shared_store(struct lp_build_nir_context *bld_base,
/* for SSBO and shared memory */
void (*load_mem)(struct lp_build_nir_context *bld_base,
unsigned nc, unsigned bit_size,
+ bool index_and_offset_are_uniform,
LLVMValueRef index, LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
void (*store_mem)(struct lp_build_nir_context *bld_base,
unsigned writemask, unsigned nc, unsigned bit_size,
static void emit_load_mem(struct lp_build_nir_context *bld_base,
unsigned nc,
unsigned bit_size,
+ bool index_and_offset_are_uniform,
LLVMValueRef index,
LLVMValueRef offset,
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
+ /* If the address is uniform, then use the address from invocation 0 to load,
+ * and broadcast to all invocations.
+ */
+ if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
+ LLVMValueRef ssbo_limit;
+ LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index,
+ lp_build_const_int32(gallivm, 0), &ssbo_limit);
+
+ offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
+
+ for (unsigned c = 0; c < nc; c++) {
+ LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
+
+ LLVMValueRef scalar;
+ /* If loading outside the SSBO, we need to skip the load and read 0 instead. */
+ if (ssbo_limit) {
+ LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
+ LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
+ LLVMBuildStore(builder, zero, res_store);
+
+ LLVMValueRef fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, ssbo_limit, chan_offset, "");
+ struct lp_build_if_state ifthen;
+ lp_build_if(&ifthen, gallivm, fetch_cond);
+ LLVMBuildStore(builder, lp_build_pointer_get(builder, mem_ptr, chan_offset), res_store);
+ lp_build_endif(&ifthen);
+
+ scalar = LLVMBuildLoad(builder, res_store, "");
+ } else {
+ scalar = lp_build_pointer_get(builder, mem_ptr, chan_offset);
+ }
+
+ outval[c] = lp_build_broadcast_scalar(load_bld, scalar);
+ }
+ return;
+ }
+
/* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];