From 1165758b8bc3b8621257189c1a786d157906000b Mon Sep 17 00:00:00 2001
From: Qiang Yu <yuq825@gmail.com>
Date: Mon, 20 Mar 2023 12:33:26 +0800
Subject: [PATCH] ac/llvm,radeonsi: remove abi->load_inputs implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

No nir_load_input in VS now.

Reviewed-by: Marek OlÅ¡Ã¡k <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22045>
---
 src/amd/llvm/ac_nir_to_llvm.c                     |   3 -
 src/amd/llvm/ac_shader_abi.h                      |   5 -
 src/gallium/drivers/radeonsi/si_shader_internal.h |   1 -
 src/gallium/drivers/radeonsi/si_shader_llvm_vs.c  | 260 ----------------------
 4 files changed, 269 deletions(-)

diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index bf51487..e7db715 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -3342,9 +3342,6 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
    /* No indirect indexing is allowed after this point. */
    assert(!indir_index);
 
-   if (ctx->stage == MESA_SHADER_VERTEX && !is_output)
-      return ctx->abi->load_inputs(ctx->abi, base, component, count, 0, component_type);
-
    /* Other non-fragment cases have outputs in temporaries. */
    if (is_output && (ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL)) {
       assert(is_output);
diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h
index ee56be0..5a9494c 100644
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@@ -66,11 +66,6 @@ struct ac_shader_abi {
    void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
                                     LLVMValueRef vertexidx, LLVMValueRef *addrs);
 
-   LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
-                               unsigned driver_location, unsigned component,
-                               unsigned num_components, unsigned vertex_index,
-                               LLVMTypeRef type);
-
    LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
                                       LLVMValueRef vertex_index, LLVMValueRef param_index,
                                       unsigned driver_location, unsigned component,
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 00c0a93..c03dad7 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -239,6 +239,5 @@ void si_llvm_ps_build_end(struct si_shader_context *ctx);
 /* si_shader_llvm_vs.c */
 void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key,
                              bool separate_prolog);
-void si_llvm_init_vs_callbacks(struct si_shader_context *ctx);
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
index bb5fb2d..96e297b 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
@@ -28,17 +28,6 @@
 #include "util/u_memory.h"
 #include "ac_nir.h"
 
-static LLVMValueRef unpack_sint16(struct si_shader_context *ctx, LLVMValueRef i32, unsigned index)
-{
-   assert(index <= 1);
-
-   if (index == 1)
-      return LLVMBuildAShr(ctx->ac.builder, i32, LLVMConstInt(ctx->ac.i32, 16, 0), "");
-
-   return LLVMBuildSExt(ctx->ac.builder, LLVMBuildTrunc(ctx->ac.builder, i32, ctx->ac.i16, ""),
-                        ctx->ac.i32, "");
-}
-
 static LLVMValueRef get_vertex_index(struct si_shader_context *ctx,
                                      struct si_vs_prolog_bits *key, unsigned input_index,
                                      LLVMValueRef instance_divisor_constbuf,
@@ -86,250 +75,6 @@ static LLVMValueRef get_vertex_index(struct si_shader_context *ctx,
    return index;
 }
 
-static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, LLVMValueRef out[4])
-{
-   const struct si_shader_info *info = &ctx->shader->selector->info;
-   unsigned vs_blit_property = info->base.vs.blit_sgprs_amd;
-
-   if (vs_blit_property) {
-      LLVMValueRef vertex_id = ctx->abi.vertex_id;
-      LLVMValueRef sel_x1 =
-         LLVMBuildICmp(ctx->ac.builder, LLVMIntULE, vertex_id, ctx->ac.i32_1, "");
-      /* Use LLVMIntNE, because we have 3 vertices and only
-       * the middle one should use y2.
-       */
-      LLVMValueRef sel_y1 = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, vertex_id, ctx->ac.i32_1, "");
-
-      unsigned param_vs_blit_inputs = ctx->args->vs_blit_inputs.arg_index;
-      if (input_index == 0) {
-         /* Position: */
-         LLVMValueRef x1y1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs);
-         LLVMValueRef x2y2 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 1);
-
-         LLVMValueRef x1 = unpack_sint16(ctx, x1y1, 0);
-         LLVMValueRef y1 = unpack_sint16(ctx, x1y1, 1);
-         LLVMValueRef x2 = unpack_sint16(ctx, x2y2, 0);
-         LLVMValueRef y2 = unpack_sint16(ctx, x2y2, 1);
-
-         LLVMValueRef x = LLVMBuildSelect(ctx->ac.builder, sel_x1, x1, x2, "");
-         LLVMValueRef y = LLVMBuildSelect(ctx->ac.builder, sel_y1, y1, y2, "");
-
-         out[0] = LLVMBuildSIToFP(ctx->ac.builder, x, ctx->ac.f32, "");
-         out[1] = LLVMBuildSIToFP(ctx->ac.builder, y, ctx->ac.f32, "");
-         out[2] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 2);
-         out[3] = ctx->ac.f32_1;
-         return;
-      }
-
-      /* Color or texture coordinates: */
-      assert(input_index == 1);
-
-      if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
-         for (int i = 0; i < 4; i++) {
-            out[i] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 3 + i);
-         }
-      } else {
-         assert(vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD);
-         LLVMValueRef x1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 3);
-         LLVMValueRef y1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 4);
-         LLVMValueRef x2 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 5);
-         LLVMValueRef y2 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 6);
-
-         out[0] = LLVMBuildSelect(ctx->ac.builder, sel_x1, x1, x2, "");
-         out[1] = LLVMBuildSelect(ctx->ac.builder, sel_y1, y1, y2, "");
-         out[2] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 7);
-         out[3] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 8);
-      }
-      return;
-   }
-
-   /* Set can_speculate=false to help keep all loads grouped together
-    * for better latency hiding. If it was true, LLVM could move the loads forward
-    * and accidentally double memory latency by doing:
-    *
-    *    buffer_load_dword_xyzw
-    *    s_waitcnt vmcnt(0)
-    *    buffer_load_dword_xyzw
-    *    s_waitcnt vmcnt(0)
-    *
-    * ... which is what we must prevent at all cost.
-    */
-   const bool can_speculate = false;
-   unsigned bit_size = info->input[input_index].fp16_lo_hi_valid & 0x1 ? 16 : 32;
-   LLVMTypeRef int_type = bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32;
-   LLVMTypeRef float_type = bit_size == 16 ? ctx->ac.f16 : ctx->ac.f32;
-   unsigned num_vbos_in_user_sgprs = ctx->shader->selector->info.num_vbos_in_user_sgprs;
-   union si_vs_fix_fetch fix_fetch;
-   LLVMValueRef vb_desc;
-   LLVMValueRef vertex_index = NULL;
-   LLVMValueRef tmp;
-
-   if (input_index < num_vbos_in_user_sgprs) {
-      vb_desc = ac_get_arg(&ctx->ac, ctx->args->vb_descriptors[input_index]);
-   } else {
-      unsigned index = input_index - num_vbos_in_user_sgprs;
-      vb_desc = ac_build_load_to_sgpr(
-         &ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->ac.vertex_buffers),
-         LLVMConstInt(ctx->ac.i32, index, 0));
-   }
-
-   if (ctx->abi.vertex_id_replaced) {
-      /* Only ngg culling will replace vertex_id, and ngg culling is an optimization key
-       * field, so the shader must be monolithic.
-       */
-      assert(ctx->shader->is_monolithic);
-      assert(ctx->abi.instance_id_replaced);
-
-      vertex_index = get_vertex_index(ctx, &ctx->shader->key.ge.part.vs.prolog,
-                                      input_index, ctx->instance_divisor_constbuf,
-                                      ctx->args->ac.start_instance.arg_index,
-                                      ctx->args->ac.base_vertex.arg_index);
-   } else {
-      vertex_index = LLVMGetParam(ctx->main_fn.value,
-                                  ctx->args->vertex_index0.arg_index + input_index);
-   }
-
-   /* Use the open-coded implementation for all loads of doubles and
-    * of dword-sized data that needs fixups. We need to insert conversion
-    * code anyway, and the amd/common code does it for us.
-    */
-   bool opencode = ctx->shader->key.ge.mono.vs_fetch_opencode & (1 << input_index);
-   fix_fetch.bits = ctx->shader->key.ge.mono.vs_fix_fetch[input_index].bits;
-   if (opencode || (fix_fetch.u.log_size == 3 && fix_fetch.u.format == AC_FETCH_FORMAT_FLOAT) ||
-       (fix_fetch.u.log_size == 2)) {
-      tmp = ac_build_opencoded_load_format(&ctx->ac, fix_fetch.u.log_size,
-                                           fix_fetch.u.num_channels_m1 + 1, fix_fetch.u.format,
-                                           fix_fetch.u.reverse, !opencode, vb_desc, vertex_index,
-                                           ctx->ac.i32_0, ctx->ac.i32_0, 0, can_speculate);
-      for (unsigned i = 0; i < 4; ++i)
-         out[i] =
-            LLVMBuildExtractElement(ctx->ac.builder, tmp, LLVMConstInt(ctx->ac.i32, i, false), "");
-
-      if (bit_size == 16) {
-         if (fix_fetch.u.format == AC_FETCH_FORMAT_UINT ||
-             fix_fetch.u.format == AC_FETCH_FORMAT_SINT) {
-            for (unsigned i = 0; i < 4; i++)
-               out[i] = LLVMBuildTrunc(ctx->ac.builder, out[i], ctx->ac.i16, "");
-         } else {
-            for (unsigned i = 0; i < 4; i++) {
-               out[i] = ac_to_float(&ctx->ac, out[i]);
-               out[i] = LLVMBuildFPTrunc(ctx->ac.builder, out[i], ctx->ac.f16, "");
-            }
-         }
-      }
-      return;
-   }
-
-   unsigned required_channels = util_last_bit(info->input[input_index].usage_mask);
-   if (required_channels == 0) {
-      for (unsigned i = 0; i < 4; ++i)
-         out[i] = LLVMGetUndef(ctx->ac.f32);
-      return;
-   }
-
-   /* Do multiple loads for special formats. */
-   LLVMValueRef fetches[4];
-   unsigned num_fetches;
-   unsigned fetch_stride;
-   unsigned channels_per_fetch;
-
-   if (fix_fetch.u.log_size <= 1 && fix_fetch.u.num_channels_m1 == 2) {
-      num_fetches = MIN2(required_channels, 3);
-      fetch_stride = 1 << fix_fetch.u.log_size;
-      channels_per_fetch = 1;
-   } else {
-      num_fetches = 1;
-      fetch_stride = 0;
-      channels_per_fetch = required_channels;
-   }
-
-   for (unsigned i = 0; i < num_fetches; ++i) {
-      LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, fetch_stride * i, 0);
-      fetches[i] = ac_build_buffer_load_format(&ctx->ac, vb_desc, vertex_index, voffset,
-                                               channels_per_fetch, 0, can_speculate,
-                                               bit_size == 16, false);
-   }
-
-   if (num_fetches == 1 && channels_per_fetch > 1) {
-      LLVMValueRef fetch = fetches[0];
-      for (unsigned i = 0; i < channels_per_fetch; ++i) {
-         tmp = LLVMConstInt(ctx->ac.i32, i, false);
-         fetches[i] = LLVMBuildExtractElement(ctx->ac.builder, fetch, tmp, "");
-      }
-      num_fetches = channels_per_fetch;
-      channels_per_fetch = 1;
-   }
-
-   for (unsigned i = num_fetches; i < 4; ++i)
-      fetches[i] = LLVMGetUndef(float_type);
-
-   if (fix_fetch.u.log_size <= 1 && fix_fetch.u.num_channels_m1 == 2 && required_channels == 4) {
-      if (fix_fetch.u.format == AC_FETCH_FORMAT_UINT || fix_fetch.u.format == AC_FETCH_FORMAT_SINT)
-         fetches[3] = LLVMConstInt(int_type, 1, 0);
-      else
-         fetches[3] = LLVMConstReal(float_type, 1);
-   } else if (fix_fetch.u.log_size == 3 &&
-              (fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ||
-               fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED ||
-               fix_fetch.u.format == AC_FETCH_FORMAT_SINT) &&
-              required_channels == 4) {
-
-      /* For 2_10_10_10, the hardware returns an unsigned value;
-       * convert it to a signed one.
-       */
-      LLVMValueRef tmp = fetches[3];
-      LLVMValueRef c30 = LLVMConstInt(int_type, 30, 0);
-
-      /* First, recover the sign-extended signed integer value. */
-      if (fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED)
-         tmp = LLVMBuildFPToUI(ctx->ac.builder, tmp, int_type, "");
-      else
-         tmp = ac_to_integer(&ctx->ac, tmp);
-
-      /* For the integer-like cases, do a natural sign extension.
-       *
-       * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
-       * and happen to contain 0, 1, 2, 3 as the two LSBs of the
-       * exponent.
-       */
-      tmp = LLVMBuildShl(
-         ctx->ac.builder, tmp,
-         fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ? LLVMConstInt(int_type, 7, 0) : c30, "");
-      tmp = LLVMBuildAShr(ctx->ac.builder, tmp, c30, "");
-
-      /* Convert back to the right type. */
-      if (fix_fetch.u.format == AC_FETCH_FORMAT_SNORM) {
-         LLVMValueRef clamp;
-         LLVMValueRef neg_one = LLVMConstReal(float_type, -1.0);
-         tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, float_type, "");
-         clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, tmp, neg_one, "");
-         tmp = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, tmp, "");
-      } else if (fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED) {
-         tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, float_type, "");
-      }
-
-      fetches[3] = tmp;
-   }
-
-   for (unsigned i = 0; i < 4; ++i)
-      out[i] = ac_to_float(&ctx->ac, fetches[i]);
-}
-
-static LLVMValueRef si_load_vs_input(struct ac_shader_abi *abi, unsigned driver_location,
-                                     unsigned component, unsigned num_components,
-                                     unsigned vertex_index, LLVMTypeRef type)
-{
-   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-   LLVMValueRef values[4];
-
-   load_input_vs(ctx, driver_location, values);
-
-   for (unsigned i = 0; i < 4; i++)
-      values[i] = LLVMBuildBitCast(ctx->ac.builder, values[i], type, "");
-
-   return ac_build_varying_gather_values(&ctx->ac, values, num_components, component);
-}
-
 /**
  * Build the vertex shader prolog function.
  *
@@ -463,8 +208,3 @@ void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part
 
    si_llvm_build_ret(ctx, ret);
 }
-
-void si_llvm_init_vs_callbacks(struct si_shader_context *ctx)
-{
-   ctx->abi.load_inputs = si_load_vs_input;
-}
-- 
2.7.4