From e5bf6b70135fc39483247fbb385d514f3516c4af Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 22 Jul 2019 08:29:42 +1000 Subject: [PATCH] gallivm: add support for compute shared memory Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 3 ++ src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c | 2 + src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 60 ++++++++++++++++-------- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 5bc2f67..ce042a1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -251,6 +251,7 @@ struct lp_build_tgsi_params { LLVMValueRef ssbo_ptr; LLVMValueRef ssbo_sizes_ptr; const struct lp_build_image_soa *image; + LLVMValueRef shared_ptr; }; void @@ -483,6 +484,8 @@ struct lp_build_tgsi_soa_context LLVMValueRef ssbos[LP_MAX_TGSI_SHADER_BUFFERS]; LLVMValueRef ssbo_sizes[LP_MAX_TGSI_SHADER_BUFFERS]; + LLVMValueRef shared_ptr; + const struct lp_build_sampler_soa *sampler; const struct lp_build_image_soa *image; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c index 672d3a5..9738770 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c @@ -311,6 +311,8 @@ analyse_instruction(struct analysis_context *ctx, continue; } else if (dst->File == TGSI_FILE_IMAGE) { continue; + } else if (dst->File == TGSI_FILE_MEMORY) { + continue; } else { assert(0); continue; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index e097dcf..ef711b4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -3014,6 +3014,8 @@ lp_emit_declaration_soa( } break; + case TGSI_FILE_MEMORY: + break; default: /* don't need to declare other vars */ break; @@ -3492,7 +3494,8 @@ load_emit( LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0]; unsigned buf = bufreg->Register.Index; - assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE); + assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY); + bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY; struct lp_build_context *uint_bld = &bld_base->uint_bld; if (bufreg->Register.File == TGSI_FILE_IMAGE) @@ -3507,19 +3510,23 @@ load_emit( index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0); index = lp_build_shr_imm(uint_bld, index, 2); - scalar_ptr = bld->ssbos[buf]; + scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf]; LLVMValueRef ssbo_limit; - ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), ""); - ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); + if (!is_shared) { + ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), ""); + ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); + } TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) { LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index)); LLVMValueRef exec_mask = mask_vec(bld_base); - LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit); - exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + if (!is_shared) { + LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit); + exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + } LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, ""); struct lp_build_loop_state loop_state; @@ -3606,7 +3613,8 @@ store_emit( struct lp_build_context *uint_bld = &bld_base->uint_bld; const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0]; unsigned buf = bufreg->Register.Index; - assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE); + assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY); + bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY; if (bufreg->Register.File == TGSI_FILE_IMAGE) { img_store_emit(action, bld_base, emit_data); @@ -3621,12 +3629,14 @@ store_emit( index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0); index = lp_build_shr_imm(uint_bld, index, 2); - scalar_ptr = bld->ssbos[buf]; + scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf]; LLVMValueRef ssbo_limit; - ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), ""); - ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); + if (!is_shared) { + ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), ""); + ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); + } TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) { LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index)); @@ -3634,8 +3644,10 @@ store_emit( value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index); LLVMValueRef exec_mask = mask_vec(bld_base); - LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit); - exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + if (!is_shared) { + LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit); + exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + } struct lp_build_loop_state loop_state; lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); @@ -3756,8 +3768,9 @@ atomic_emit( struct lp_build_context *uint_bld = &bld_base->uint_bld; const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0]; - assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE); + assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY); unsigned buf = bufreg->Register.Index; + bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY; LLVMAtomicRMWBinOp op; switch (emit_data->inst->Instruction.Opcode) { @@ -3807,20 +3820,28 @@ atomic_emit( value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0); index = lp_build_shr_imm(uint_bld, index, 2); - index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan)); - scalar_ptr = bld->ssbos[buf]; + if (!is_shared) { + index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan)); + scalar_ptr = bld->ssbos[buf]; + } else + scalar_ptr = bld->shared_ptr; LLVMValueRef atom_res = lp_build_alloca(gallivm, uint_bld->vec_type, ""); LLVMValueRef ssbo_limit; - ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), ""); - ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); + if (!is_shared) { + ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), ""); + ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); + } LLVMValueRef exec_mask = mask_vec(bld_base); - LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit); - exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + + if (!is_shared) { + LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit); + exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + } struct lp_build_loop_state loop_state; lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); @@ -4391,6 +4412,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, bld.context_ptr = params->context_ptr; bld.thread_data_ptr = params->thread_data_ptr; bld.image = params->image; + bld.shared_ptr = params->shared_ptr; /* * If the number of temporaries is rather large then we just -- 2.7.4