radeonsi: add si_nir_lower_resource pass
authorQiang Yu <yuq825@gmail.com>
Tue, 16 Aug 2022 10:29:03 +0000 (18:29 +0800)
committerQiang Yu <yuq825@gmail.com>
Mon, 19 Dec 2022 01:20:50 +0000 (09:20 +0800)
Replace the load_ubo abi.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18666>

src/gallium/drivers/radeonsi/meson.build
src/gallium/drivers/radeonsi/si_nir_lower_resource.c [new file with mode: 0644]
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader_internal.h
src/gallium/drivers/radeonsi/si_shader_llvm_resources.c

index 18099fa..40e9f25 100644 (file)
@@ -46,6 +46,7 @@ files_libradeonsi = files(
   'si_query.c',
   'si_query.h',
   'si_nir_lower_abi.c',
+  'si_nir_lower_resource.c',
   'si_nir_optim.c',
   'si_sdma_copy_image.c',
   'si_shader.c',
diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_resource.c b/src/gallium/drivers/radeonsi/si_nir_lower_resource.c
new file mode 100644 (file)
index 0000000..02149d9
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This lowering pass converts index based buffer/image/texture access to
+ * explicite descriptor based, which simplify the compiler backend translation.
+ *
+ * For example: load_ubo(1) -> load_ubo(vec4), where the vec4 is the buffer
+ * descriptor with index==1, so compiler backend don't need to do index-to-descriptor
+ * finding which is the most complicated part (move to nir now).
+ */
+
+#include "nir_builder.h"
+
+#include "ac_nir.h"
+#include "si_pipe.h"
+#include "si_shader_internal.h"
+#include "sid.h"
+
+struct lower_resource_state {
+   struct si_shader *shader;
+   struct si_shader_args *args;
+};
+
+static nir_ssa_def *load_ubo_desc_fast_path(nir_builder *b, nir_ssa_def *addr_lo,
+                                            struct si_shader_selector *sel)
+{
+   nir_ssa_def *addr_hi =
+      nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(sel->screen->info.address32_hi));
+
+   uint32_t rsrc3 =
+      S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+      S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+   if (sel->screen->info.gfx_level >= GFX11)
+      rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
+               S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
+   else if (sel->screen->info.gfx_level >= GFX10)
+      rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
+               S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+   else
+      rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+               S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+   return nir_vec4(b, addr_lo, addr_hi, nir_imm_int(b, sel->info.constbuf0_num_slots * 16),
+                   nir_imm_int(b, rsrc3));
+}
+
+static nir_ssa_def *clamp_index(nir_builder *b, nir_ssa_def *index, unsigned max)
+{
+   if (util_is_power_of_two_or_zero(max))
+      return nir_iand_imm(b, index, max - 1);
+   else {
+      nir_ssa_def *clamp = nir_imm_int(b, max - 1);
+      nir_ssa_def *cond = nir_uge(b, clamp, index);
+      return nir_bcsel(b, cond, index, clamp);
+   }
+}
+
+static nir_ssa_def *load_ubo_desc(nir_builder *b, nir_ssa_def *index,
+                                  struct lower_resource_state *s)
+{
+   struct si_shader_selector *sel = s->shader->selector;
+
+   nir_ssa_def *addr = ac_nir_load_arg(b, &s->args->ac, s->args->const_and_shader_buffers);
+
+   if (sel->info.base.num_ubos == 1 && sel->info.base.num_ssbos == 0)
+      return load_ubo_desc_fast_path(b, addr, sel);
+
+   index = clamp_index(b, index, sel->info.base.num_ubos);
+   index = nir_iadd_imm(b, index, SI_NUM_SHADER_BUFFERS);
+
+   nir_ssa_def *offset = nir_ishl_imm(b, index, 4);
+   return nir_load_smem_amd(b, 4, addr, offset);
+}
+
+static bool lower_resource_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
+                                     struct lower_resource_state *s)
+{
+   switch (intrin->intrinsic) {
+   case nir_intrinsic_load_ubo: {
+      assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
+
+      nir_ssa_def *desc = load_ubo_desc(b, intrin->src[0].ssa, s);
+      nir_instr_rewrite_src_ssa(&intrin->instr, &intrin->src[0], desc);
+      break;
+   }
+   default:
+      return false;
+   }
+
+   return true;
+}
+
+static bool lower_resource_instr(nir_builder *b, nir_instr *instr, void *state)
+{
+   struct lower_resource_state *s = (struct lower_resource_state *)state;
+
+   b->cursor = nir_before_instr(instr);
+
+   switch (instr->type) {
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      return lower_resource_intrinsic(b, intrin, s);
+   }
+   default:
+      return false;
+   }
+}
+
+bool si_nir_lower_resource(nir_shader *nir, struct si_shader *shader,
+                           struct si_shader_args *args)
+{
+   struct lower_resource_state state = {
+      .shader = shader,
+      .args = args,
+   };
+
+   return nir_shader_instructions_pass(nir, lower_resource_instr,
+                                       nir_metadata_dominance | nir_metadata_block_index,
+                                       &state);
+}
index 68c7968..0c48478 100644 (file)
@@ -1886,6 +1886,8 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_
    if (sel->stage == MESA_SHADER_FRAGMENT && key->ps.mono.point_smoothing)
       NIR_PASS(progress, nir, nir_lower_point_smooth);
 
+   NIR_PASS(progress, nir, si_nir_lower_resource, shader, args);
+
    bool is_last_vgt_stage =
       (sel->stage == MESA_SHADER_VERTEX ||
        sel->stage == MESA_SHADER_TESS_EVAL ||
index e258371..be5c1fd 100644 (file)
@@ -189,6 +189,10 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);
 /* si_nir_lower_abi.c */
 bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shader_args *args);
 
+/* si_nir_lower_resource.c */
+bool si_nir_lower_resource(nir_shader *nir, struct si_shader *shader,
+                           struct si_shader_args *args);
+
 /* si_shader_llvm.c */
 bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
                      struct ac_shader_config *conf, struct ac_llvm_compiler *compiler,
index 3fcbe3b..2af7bea 100644 (file)
@@ -53,60 +53,6 @@ static LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, LLVMValue
    return index;
 }
 
-static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
-{
-   LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->args->const_and_shader_buffers);
-   struct si_shader_selector *sel = ctx->shader->selector;
-
-   /* Do the bounds checking with a descriptor, because
-    * doing computation and manual bounds checking of 64-bit
-    * addresses generates horrible VALU code with very high
-    * VGPR usage and very low SIMD occupancy.
-    */
-   ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
-
-   LLVMValueRef desc0, desc1;
-   desc0 = ptr;
-   desc1 = LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
-
-   uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-                    S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
-   if (ctx->screen->info.gfx_level >= GFX11)
-      rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
-               S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
-   else if (ctx->screen->info.gfx_level >= GFX10)
-      rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
-               S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
-   else
-      rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-               S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-
-   LLVMValueRef desc_elems[] = {desc0, desc1,
-                                LLVMConstInt(ctx->ac.i32, sel->info.constbuf0_num_slots * 16, 0),
-                                LLVMConstInt(ctx->ac.i32, rsrc3, false)};
-
-   return ac_build_gather_values(&ctx->ac, desc_elems, 4);
-}
-
-static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
-{
-   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-   struct si_shader_selector *sel = ctx->shader->selector;
-
-   if (sel->info.base.num_ubos == 1 && sel->info.base.num_ssbos == 0) {
-      return load_const_buffer_desc_fast_path(ctx);
-   }
-
-   index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
-   index =
-      LLVMBuildAdd(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, SI_NUM_SHADER_BUFFERS, 0), "");
-
-   return ac_build_load_to_sgpr(&ctx->ac,
-                                ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->const_and_shader_buffers),
-                                index);
-}
-
 static LLVMValueRef load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write, bool non_uniform)
 {
    struct si_shader_context *ctx = si_shader_context_from_abi(abi);
@@ -335,7 +281,6 @@ static LLVMValueRef si_nir_load_sampler_desc(struct ac_shader_abi *abi, unsigned
 
 void si_llvm_init_resource_callbacks(struct si_shader_context *ctx)
 {
-   ctx->abi.load_ubo = load_ubo;
    ctx->abi.load_ssbo = load_ssbo;
    ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
 }