From 05b6612b4ec7c5386d4840d251d76123d4cee0c3 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 2 Jun 2020 13:20:34 +0200 Subject: [PATCH] radv: do not lower UBO/SSBO access to offsets Use nir_lower_explicit_io instead of lowering to offsets. Extra (useless) additions are removed by lowering load_vulkan_descriptor to vec2(src.x, 0). fossils-db (Navi): Totals from 18236 (13.21% of 138013) affected shaders: SGPRs: 1172766 -> 1168278 (-0.38%); split: -0.89%, +0.50% VGPRs: 940156 -> 952232 (+1.28%); split: -0.08%, +1.37% SpillSGPRs: 30286 -> 31109 (+2.72%); split: -0.78%, +3.50% SpillVGPRs: 1893 -> 1909 (+0.85%) CodeSize: 87910396 -> 88113592 (+0.23%); split: -0.35%, +0.58% Scratch: 819200 -> 823296 (+0.50%) MaxWaves: 205535 -> 202102 (-1.67%); split: +0.05%, -1.72% Instrs: 17052527 -> 17113484 (+0.36%); split: -0.32%, +0.67% Cycles: 670794876 -> 669084540 (-0.25%); split: -0.38%, +0.13% VMEM: 5274728 -> 5388556 (+2.16%); split: +3.10%, -0.94% SMEM: 1196146 -> 1165850 (-2.53%); split: +2.06%, -4.60% VClause: 381463 -> 399217 (+4.65%); split: -1.08%, +5.73% SClause: 666216 -> 631135 (-5.27%); split: -5.44%, +0.18% Copies: 1292720 -> 1289318 (-0.26%); split: -1.28%, +1.01% Branches: 467336 -> 473028 (+1.22%); split: -0.67%, +1.89% PreSGPRs: 766459 -> 772175 (+0.75%); split: -0.53%, +1.28% PreVGPRs: 819746 -> 825327 (+0.68%); split: -0.05%, +0.73% Signed-off-by: Samuel Pitoiset Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 10 +++++-- src/amd/vulkan/radv_meta.c | 4 +-- src/amd/vulkan/radv_shader.c | 41 +++++++++++++++++++++++++- 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 2463ecf..4b92af6 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5031,7 +5031,12 @@ void visit_load_resource(isel_context *ctx, nir_intrinsic_instr *instr) Operand(desc_ptr)); } - bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), index); + Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); + std::array elems; + elems[0] = index; + ctx->allocated_vec.emplace(dst.id(), elems); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), index, + Operand((unsigned)ctx->options->address32_hi)); } void load_buffer(isel_context *ctx, unsigned num_components, unsigned component_size, @@ -5062,7 +5067,8 @@ void visit_load_ubo(isel_context *ctx, nir_intrinsic_instr *instr) Builder bld(ctx->program, ctx->block); - nir_intrinsic_instr* idx_instr = nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr); + nir_alu_instr* mov_instr = nir_instr_as_alu(instr->src[0].ssa->parent_instr); + nir_intrinsic_instr* idx_instr = nir_instr_as_intrinsic(mov_instr->src[0].src.ssa->parent_instr); unsigned desc_set = nir_intrinsic_desc_set(idx_instr); unsigned binding = nir_intrinsic_binding(idx_instr); radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout; diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c index 1ff675b..a8e1f7a 100644 --- a/src/amd/vulkan/radv_meta.c +++ b/src/amd/vulkan/radv_meta.c @@ -666,11 +666,11 @@ radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding) nir_intrinsic_vulkan_resource_index); rsrc->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - rsrc->num_components = 1; + rsrc->num_components = 2; nir_intrinsic_set_desc_set(rsrc, desc_set); nir_intrinsic_set_binding(rsrc, binding); nir_ssa_dest_init(&rsrc->instr, &rsrc->dest, rsrc->num_components, 32, NULL); nir_builder_instr_insert(b, &rsrc->instr); - return &rsrc->dest.ssa; + return nir_channel(b, &rsrc->dest.ssa, 0); } diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index bce321a..c44a7b4 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -348,6 +348,40 @@ static void radv_compiler_debug(void *private_data, 0, 0, "radv", message); } +static bool +lower_load_vulkan_descriptor(nir_shader *nir) +{ + nir_function_impl *entry = nir_shader_get_entrypoint(nir); + bool progress = false; + nir_builder b; + + nir_builder_init(&b, entry); + + nir_foreach_block(block, entry) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor) + continue; + + b.cursor = nir_before_instr(&intrin->instr); + + nir_ssa_def *def = nir_vec2(&b, + nir_channel(&b, intrin->src[0].ssa, 0), + nir_imm_int(&b, 0)); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(def)); + + nir_instr_remove(instr); + progress = true; + } + } + + return progress; +} + nir_shader * radv_shader_compile_to_nir(struct radv_device *device, struct radv_shader_module *module, @@ -414,7 +448,6 @@ radv_shader_compile_to_nir(struct radv_device *device, .module = module, }; const struct spirv_to_nir_options spirv_options = { - .lower_ubo_ssbo_access_to_offsets = true, .caps = { .amd_fragment_mask = true, .amd_gcn_shader = true, @@ -617,6 +650,12 @@ radv_shader_compile_to_nir(struct radv_device *device, */ nir_lower_var_copies(nir); + NIR_PASS_V(nir, nir_lower_explicit_io, + nir_var_mem_ubo | nir_var_mem_ssbo, + nir_address_format_32bit_index_offset); + + NIR_PASS_V(nir, lower_load_vulkan_descriptor); + /* Lower deref operations for compute shared memory. */ if (nir->info.stage == MESA_SHADER_COMPUTE) { NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, -- 2.7.4