amd: lower subdword UBO loads in NIR
authorMarek Olšák <marek.olsak@amd.com>
Sat, 29 Oct 2022 21:29:37 +0000 (17:29 -0400)
committerMarge Bot <emma+marge@anholt.net>
Fri, 3 Mar 2023 03:27:40 +0000 (03:27 +0000)
This fixes broken subdword UBO loads with LLVM.

It's only needed for LLVM, but it's done for both LLVM and ACO because
the pass can be fully validated only with ACO and the Vulkan CTS right now.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19399>

src/amd/llvm/ac_nir_to_llvm.c
src/amd/vulkan/radv_pipeline.c
src/gallium/drivers/radeonsi/si_shader_nir.c

index f721e21..0389682 100644 (file)
@@ -2324,34 +2324,17 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, nir_intrin
    LLVMValueRef offset = get_src(ctx, instr->src[1]);
    int num_components = instr->num_components;
 
+   assert(instr->dest.ssa.bit_size >= 32 && instr->dest.ssa.bit_size % 32 == 0);
+
    if (ctx->abi->load_ubo)
       rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
 
-   /* Convert to a scalar 32-bit load. */
+   /* Convert to a 32-bit load. */
    if (instr->dest.ssa.bit_size == 64)
       num_components *= 2;
-   else if (instr->dest.ssa.bit_size == 16)
-      num_components = DIV_ROUND_UP(num_components, 2);
-   else if (instr->dest.ssa.bit_size == 8)
-      num_components = DIV_ROUND_UP(num_components, 4);
-
-   ret =
-      ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL,
-                           ctx->ac.f32, 0, true, true);
-
-   /* Convert to the original type. */
-   if (instr->dest.ssa.bit_size == 64) {
-      ret = LLVMBuildBitCast(ctx->ac.builder, ret,
-                             LLVMVectorType(ctx->ac.i64, num_components / 2), "");
-   } else if (instr->dest.ssa.bit_size == 16) {
-      ret = LLVMBuildBitCast(ctx->ac.builder, ret,
-                             LLVMVectorType(ctx->ac.i16, num_components * 2), "");
-   } else if (instr->dest.ssa.bit_size == 8) {
-      ret = LLVMBuildBitCast(ctx->ac.builder, ret,
-                             LLVMVectorType(ctx->ac.i8, num_components * 4), "");
-   }
 
-   ret = ac_trim_vector(&ctx->ac, ret, instr->num_components);
+   ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL,
+                              ctx->ac.f32, 0, true, true);
    ret = LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
 
    return exit_waterfall(ctx, &wctx, ret);
index 3382e41..f16b217 100644 (file)
@@ -3182,6 +3182,12 @@ radv_postprocess_nir(struct radv_pipeline *pipeline,
       }
    }
 
+   NIR_PASS(_, stage->nir, ac_nir_lower_subdword_loads,
+            (ac_nir_lower_subdword_options) {
+               .modes_1_comp = nir_var_mem_ubo,
+               .modes_N_comps = nir_var_mem_ubo
+            });
+
    progress = false;
    NIR_PASS(progress, stage->nir, nir_vk_lower_ycbcr_tex, ycbcr_conversion_lookup, pipeline_layout);
    /* Gather info in the case that nir_vk_lower_ycbcr_tex might have emitted resinfo instructions. */
index 19786d0..624e411 100644 (file)
@@ -25,6 +25,7 @@
 #include "nir_builder.h"
 #include "nir_xfb_info.h"
 #include "si_pipe.h"
+#include "ac_nir.h"
 
 
 static bool si_alu_to_scalar_filter(const nir_instr *instr, const void *data)
@@ -358,6 +359,11 @@ char *si_finalize_nir(struct pipe_screen *screen, void *nirptr)
 
    nir_lower_io_passes(nir);
 
+   NIR_PASS_V(nir, ac_nir_lower_subdword_loads,
+              (ac_nir_lower_subdword_options) {
+                 .modes_1_comp = nir_var_mem_ubo,
+                 .modes_N_comps = nir_var_mem_ubo
+              });
    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
 
    /* Remove dead derefs, so that we can remove uniforms. */