From f7f0d31fcccf1da67149c8569c3bd6cbc846ab9a Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Sat, 15 Apr 2023 15:51:41 +0800 Subject: [PATCH] nir,ac/llvm,radeonsi: replace nir_load_smem_buffer_amd with nir_load_ubo MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit They use same instruction. Just because when the time nir_load_smem_buffer_amd was introduced, radeonsi didn't support pass buffer descriptor to nir_load_ubo directly. Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Part-of: --- src/amd/llvm/ac_nir_to_llvm.c | 9 --------- src/compiler/nir/nir_divergence_analysis.c | 1 - src/compiler/nir/nir_intrinsics.py | 5 ----- src/gallium/drivers/radeonsi/si_nir_lower_abi.c | 8 +++++--- src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c | 4 ++-- 5 files changed, 7 insertions(+), 20 deletions(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 4c538e8..904dc29 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -4194,15 +4194,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); break; } - case nir_intrinsic_load_smem_buffer_amd: { - LLVMValueRef descriptor = get_src(ctx, instr->src[0]); - LLVMValueRef offset = get_src(ctx, instr->src[1]); - unsigned num_components = instr->dest.ssa.num_components; - - result = ac_build_buffer_load(&ctx->ac, descriptor, num_components, NULL, offset, NULL, - ctx->ac.i32, 0, true, true); - break; - } case nir_intrinsic_ordered_xfb_counter_add_amd: { /* must be called in a single lane of a workgroup. */ /* TODO: Add RADV support. */ diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 15695d6..be83387 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -184,7 +184,6 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) case nir_intrinsic_load_tess_level_outer_default: case nir_intrinsic_load_scalar_arg_amd: case nir_intrinsic_load_smem_amd: - case nir_intrinsic_load_smem_buffer_amd: case nir_intrinsic_load_rt_dynamic_callable_stack_base_amd: case nir_intrinsic_load_global_const_block_intel: case nir_intrinsic_load_reloc_const_intel: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index f2778cb..9cb0125 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1533,11 +1533,6 @@ intrinsic("load_smem_amd", src_comp=[1, 1], dest_comp=0, bit_sizes=[32], indices=[ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE, CAN_REORDER]) -# src[] = { descriptor, offset } -intrinsic("load_smem_buffer_amd", src_comp=[4, 1], dest_comp=0, bit_sizes=[32], - indices=[ALIGN_MUL, ALIGN_OFFSET], - flags=[CAN_ELIMINATE, CAN_REORDER]) - # src[] = { offset }. intrinsic("load_shared2_amd", [1], dest_comp=2, indices=[OFFSET0, OFFSET1, ST64], flags=[CAN_ELIMINATE]) diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 473f035..a3a7a6f 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -256,7 +256,8 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s unsigned num_components = intrin->dest.ssa.num_components; unsigned offset = intrin->intrinsic == nir_intrinsic_load_tess_level_inner_default ? 16 : 0; - replacement = nir_load_smem_buffer_amd(b, num_components, buf, nir_imm_int(b, offset)); + replacement = nir_load_ubo(b, num_components, 32, buf, nir_imm_int(b, offset), + .range = ~0); break; } case nir_intrinsic_load_patch_vertices_in: @@ -361,7 +362,8 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s case nir_intrinsic_load_user_clip_plane: { nir_ssa_def *buf = si_nir_load_internal_binding(b, args, SI_VS_CONST_CLIP_PLANES, 4); unsigned offset = nir_intrinsic_ucp_id(intrin) * 16; - replacement = nir_load_smem_buffer_amd(b, 4, buf, nir_imm_int(b, offset)); + replacement = nir_load_ubo(b, 4, 32, buf, nir_imm_int(b, offset), + .range = ~0); break; } case nir_intrinsic_load_streamout_buffer_amd: { @@ -453,7 +455,7 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s nir_ssa_def *offset = nir_ishl_imm(b, sample_id, 3); nir_ssa_def *buf = si_nir_load_internal_binding(b, args, SI_PS_CONST_SAMPLE_POSITIONS, 4); - nir_ssa_def *sample_pos = nir_load_smem_buffer_amd(b, 2, buf, offset); + nir_ssa_def *sample_pos = nir_load_ubo(b, 2, 32, buf, offset, .range = ~0); sample_pos = nir_fsub(b, sample_pos, nir_imm_float(b, 0.5)); diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c b/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c index ce7f862..96fcb28 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c @@ -77,8 +77,8 @@ get_vertex_index_for_mono_shader(nir_builder *b, int input_index, index = instance_id; } else { nir_ssa_def *offset = nir_imm_int(b, input_index * 16); - nir_ssa_def *divisor = - nir_load_smem_buffer_amd(b, 4, s->instance_divisor_constbuf, offset); + nir_ssa_def *divisor = nir_load_ubo(b, 4, 32, s->instance_divisor_constbuf, offset, + .range = ~0); /* The faster NUW version doesn't work when InstanceID == UINT_MAX. * Such InstanceID might not be achievable in a reasonable time though. -- 2.7.4