ac/nir: implement image opcode emulation for CDNA, enable it in radeonsi

author Marek Olšák <marek.olsak@amd.com>

Fri, 28 Oct 2022 21:21:07 +0000 (17:21 -0400)

committer Marge Bot <emma+marge@anholt.net>

Thu, 6 Apr 2023 15:00:53 +0000 (15:00 +0000)
author Marek Olšák <marek.olsak@amd.com>
Fri, 28 Oct 2022 21:21:07 +0000 (17:21 -0400)
committer Marge Bot <emma+marge@anholt.net>
Thu, 6 Apr 2023 15:00:53 +0000 (15:00 +0000)
diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h

index b6c9d09..60e4291 100644 (file)
--- a/src/amd/common/ac_nir.h
+++ b/src/amd/common/ac_nir.h
@@ -230,6 +230,7 @@ bool
  ac_nir_lower_global_access(nir_shader *shader);
  
  bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
+bool ac_nir_lower_image_opcodes(nir_shader *nir);
  
  typedef struct ac_nir_gs_output_info {
     const uint8_t *streams;
diff --git a/src/amd/common/ac_nir_lower_image_opcodes_cdna.c b/src/amd/common/ac_nir_lower_image_opcodes_cdna.c

new file mode 100644 (file)

index 0000000..b29b3e8
--- /dev/null
+++ b/src/amd/common/ac_nir_lower_image_opcodes_cdna.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/* This lowers image and texture opcodes to typed buffer opcodes (equivalent to image buffers)
+ * for some CDNA chips. Sampler buffers and image buffers are not lowered.
+ *
+ * Only the subset of opcodes and states that is used by VAAPI and OpenMAX is lowered.
+ * That means CLAMP_TO_EDGE is always used. Only level 0 can be accessed. The minification
+ * and magnification filter settings are assumed to be equal.
+ *
+ * This uses a custom image descriptor that is used in conjunction with this pass. The first
+ * 4 dwords of the descriptor contain the buffer descriptor where the format matches the image
+ * format and the stride matches the pixel size, and the last 4 dwords contain parameters
+ * for manual address computations and bounds checking like the pitch, the number of elements
+ * per slice, etc.
+ *
+ */
+
+#include "ac_nir.h"
+#include "nir_builder.h"
+#include "amdgfxregs.h"
+
+static nir_ssa_def *get_field(nir_builder *b, nir_ssa_def *desc, unsigned index, unsigned mask)
+{
+   return nir_ubfe_imm(b, nir_channel(b, desc, index), ffs(mask) - 1, util_bitcount(mask));
+}
+
+static unsigned get_coord_components(enum glsl_sampler_dim dim, bool is_array)
+{
+   switch (dim) {
+   case GLSL_SAMPLER_DIM_1D:
+      return is_array ? 2 : 1;
+   case GLSL_SAMPLER_DIM_2D:
+   case GLSL_SAMPLER_DIM_RECT:
+      return is_array ? 3 : 2;
+   case GLSL_SAMPLER_DIM_3D:
+      return 3;
+   default:
+      unreachable("unexpected sampler type");
+   }
+}
+
+/* Lower image coordinates to a buffer element index. Return UINT_MAX if the image coordinates
+ * are out of bounds.
+ */
+static nir_ssa_def *lower_image_coords(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *coord,
+                                       enum glsl_sampler_dim dim, bool is_array,
+                                       bool handle_out_of_bounds)
+{
+   unsigned num_coord_components = get_coord_components(dim, is_array);
+   nir_ssa_def *zero = nir_imm_int(b, 0);
+
+   /* Get coordinates. */
+   nir_ssa_def *x = nir_channel(b, coord, 0);
+   nir_ssa_def *y = num_coord_components >= 2 ? nir_channel(b, coord, 1) : NULL;
+   nir_ssa_def *z = num_coord_components >= 3 ? nir_channel(b, coord, 2) : NULL;
+
+   if (dim == GLSL_SAMPLER_DIM_1D && is_array) {
+      z = y;
+      y = NULL;
+   }
+
+   if (is_array) {
+      nir_ssa_def *first_layer = get_field(b, desc, 5, 0xffff0000);
+      z = nir_iadd(b, z, first_layer);
+   }
+
+   /* Compute the buffer element index. */
+   nir_ssa_def *index = x;
+   if (y) {
+      nir_ssa_def *pitch = nir_channel(b, desc, 6);
+      index = nir_iadd(b, index, nir_imul(b, pitch, y));
+   }
+   if (z) {
+      nir_ssa_def *slice_elements = nir_channel(b, desc, 7);
+      index = nir_imul(b, slice_elements, z);
+   }
+
+   /* Determine whether the coordinates are out of bounds. */
+   nir_ssa_def *out_of_bounds = NULL;
+
+   if (handle_out_of_bounds) {
+      nir_ssa_def *width = get_field(b, desc, 4, 0xffff);
+      out_of_bounds = nir_ior(b, nir_ilt(b, x, zero), nir_ige(b, x, width));
+
+      if (y) {
+         nir_ssa_def *height = get_field(b, desc, 4, 0xffff0000);
+         out_of_bounds = nir_ior(b, out_of_bounds,
+                                 nir_ior(b, nir_ilt(b, y, zero), nir_ige(b, y, height)));
+      }
+      if (z) {
+         nir_ssa_def *depth = get_field(b, desc, 5, 0xffff);
+         out_of_bounds = nir_ior(b, out_of_bounds,
+                                 nir_ior(b, nir_ilt(b, z, zero), nir_ige(b, z, depth)));
+      }
+
+      /* Make the buffer opcode out of bounds by setting UINT_MAX. */
+      index = nir_bcsel(b, out_of_bounds, nir_imm_int(b, UINT_MAX), index);
+   }
+
+   return index;
+}
+
+static nir_ssa_def *emulated_image_load(nir_builder *b, unsigned num_components, unsigned bit_size,
+                                        nir_ssa_def *desc, nir_ssa_def *coord,
+                                        enum gl_access_qualifier access, enum glsl_sampler_dim dim,
+                                        bool is_array, bool handle_out_of_bounds)
+{
+   nir_ssa_def *zero = nir_imm_int(b, 0);
+
+   return nir_load_buffer_amd(b, num_components, bit_size, nir_channels(b, desc, 0xf),
+                              zero, zero,
+                              lower_image_coords(b, desc, coord, dim, is_array,
+                                                 handle_out_of_bounds),
+                              .base = 0,
+                              .memory_modes = nir_var_image,
+                              .access = access | ACCESS_USES_FORMAT_AMD);
+}
+
+static void emulated_image_store(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *coord,
+                                 nir_ssa_def *data, enum gl_access_qualifier access,
+                                 enum glsl_sampler_dim dim, bool is_array)
+{
+   nir_ssa_def *zero = nir_imm_int(b, 0);
+
+   nir_store_buffer_amd(b, data, nir_channels(b, desc, 0xf), zero, zero,
+                        lower_image_coords(b, desc, coord, dim, is_array, true),
+                        .base = 0,
+                        .memory_modes = nir_var_image,
+                        .access = access | ACCESS_USES_FORMAT_AMD);
+}
+
+/* Return the width, height, or depth for dim=0,1,2. */
+static nir_ssa_def *get_dim(nir_builder *b, nir_ssa_def *desc, unsigned dim)
+{
+   return get_field(b, desc, 4 + dim / 2, 0xffff << (16 * (dim % 2)));
+}
+
+/* Lower txl with lod=0 to typed buffer loads. This is based on the equations in the GL spec.
+ * This basically converts the tex opcode into 1 or more image_load opcodes.
+ */
+static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_components,
+                                            unsigned bit_size, nir_ssa_def *desc,
+                                            nir_ssa_def *sampler_desc, nir_ssa_def *coord_vec,
+                                            enum glsl_sampler_dim sampler_dim, bool is_array)
+{
+   const enum gl_access_qualifier access =
+      ACCESS_RESTRICT | ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER;
+   const unsigned num_coord_components = get_coord_components(sampler_dim, is_array);
+   const unsigned num_dim_coords = num_coord_components - is_array;
+   const unsigned array_comp = num_coord_components - 1;
+
+   nir_ssa_def *zero = nir_imm_int(b, 0);
+   nir_ssa_def *fp_one = nir_imm_floatN_t(b, 1, bit_size);
+   nir_ssa_def *coord[3] = {0};
+
+   assert(num_coord_components <= 3);
+   for (unsigned i = 0; i < num_coord_components; i++)
+      coord[i] = nir_channel(b, coord_vec, i);
+
+   /* Convert to unnormalized coordinates. */
+   if (sampler_dim != GLSL_SAMPLER_DIM_RECT) {
+      for (unsigned dim = 0; dim < num_dim_coords; dim++)
+         coord[dim] = nir_fmul(b, coord[dim], nir_u2f32(b, get_dim(b, desc, dim)));
+   }
+
+   /* The layer index is handled differently and ignores the filter and wrap mode. */
+   if (is_array) {
+      coord[array_comp] = nir_f2i32(b, nir_fround_even(b, coord[array_comp]));
+      coord[array_comp] = nir_iclamp(b, coord[array_comp], zero,
+                                     nir_iadd_imm(b, get_dim(b, desc, 2), -1));
+   }
+
+   /* Determine the filter by reading the first bit of the XY_MAG_FILTER field,
+    * which is 1 for linear, 0 for nearest.
+    *
+    * We assume that XY_MIN_FILTER and Z_FILTER are identical.
+    */
+   nir_ssa_def *is_nearest =
+      nir_ieq_imm(b, nir_iand_imm(b, nir_channel(b, sampler_desc, 2), 1 << 20), 0);
+   nir_ssa_def *result_nearest, *result_linear;
+
+   nir_if *if_nearest = nir_push_if(b, is_nearest);
+   {
+      /* Nearest filter. */
+      nir_ssa_def *coord0[3] = {0};
+      memcpy(coord0, coord, sizeof(coord));
+
+      for (unsigned dim = 0; dim < num_dim_coords; dim++) {
+         /* Convert to integer coordinates. (floor is required) */
+         coord0[dim] = nir_f2i32(b, nir_ffloor(b, coord0[dim]));
+
+         /* Apply the wrap mode. We assume it's always CLAMP_TO_EDGE, so clamp. */
+         coord0[dim] = nir_iclamp(b, coord0[dim], zero, nir_iadd_imm(b, get_dim(b, desc, dim), -1));
+      }
+
+      /* Load the texel. */
+      result_nearest = emulated_image_load(b, num_components, bit_size, desc,
+                                           nir_vec(b, coord0, num_coord_components),
+                                           access, sampler_dim, is_array, false);
+   }
+   nir_push_else(b, if_nearest);
+   {
+      /* Linear filter. */
+      nir_ssa_def *coord0[3] = {0};
+      nir_ssa_def *coord1[3] = {0};
+      nir_ssa_def *weight[3] = {0};
+
+      memcpy(coord0, coord, sizeof(coord));
+
+      for (unsigned dim = 0; dim < num_dim_coords; dim++) {
+         /* First subtract 0.5. */
+         coord0[dim] = nir_fadd_imm(b, coord0[dim], -0.5);
+
+         /* Use fract to compute the filter weights. (FP16 results will get FP16 filter precision) */
+         weight[dim] = nir_f2fN(b, nir_ffract(b, coord0[dim]), bit_size);
+
+         /* Floor to get the top-left texel of the filter. */
+         /* Add 1 to get the bottom-right texel. */
+         coord0[dim] = nir_f2i32(b, nir_ffloor(b, coord0[dim]));
+         coord1[dim] = nir_iadd_imm(b, coord0[dim], 1);
+
+         /* Apply the wrap mode. We assume it's always CLAMP_TO_EDGE, so clamp. */
+         coord0[dim] = nir_iclamp(b, coord0[dim], zero, nir_iadd_imm(b, get_dim(b, desc, dim), -1));
+         coord1[dim] = nir_iclamp(b, coord1[dim], zero, nir_iadd_imm(b, get_dim(b, desc, dim), -1));
+      }
+
+      /* Load all texels for the linear filter.
+       * This is 2 texels for 1D, 4 texels for 2D, and 8 texels for 3D.
+       */
+      nir_ssa_def *texel[8];
+
+      for (unsigned i = 0; i < (1 << num_dim_coords); i++) {
+         nir_ssa_def *texel_coord[3];
+
+         /* Determine whether the current texel should use channels from coord0
+          * or coord1. The i-th bit of the texel index determines that.
+          */
+         for (unsigned dim = 0; dim < num_dim_coords; dim++)
+            texel_coord[dim] = (i >> dim) & 0x1 ? coord1[dim] : coord0[dim];
+
+         /* Add the layer index, which doesn't change between texels. */
+         if (is_array)
+            texel_coord[array_comp] = coord0[array_comp];
+
+         /* Compute how much the texel contributes to the final result. */
+         nir_ssa_def *texel_weight = fp_one;
+         for (unsigned dim = 0; dim < num_dim_coords; dim++) {
+            /* Let's see what "i" represents:
+             *    Texel i=0 = 000
+             *    Texel i=1 = 001
+             *    Texel i=2 = 010 (2D & 3D only)
+             *    Texel i=3 = 011 (2D & 3D only)
+             *    Texel i=4 = 100 (3D only)
+             *    Texel i=5 = 101 (3D only)
+             *    Texel i=6 = 110 (3D only)
+             *    Texel i=7 = 111 (3D only)
+             *
+             * The rightmost bit (LSB) represents the X direction, the middle bit represents
+             * the Y direction, and the leftmost bit (MSB) represents the Z direction.
+             * If we shift the texel index "i" by the dimension "dim", we'll get whether that
+             * texel value should be multiplied by (1 - weight[dim]) or (weight[dim]).
+             */
+            texel_weight = nir_fmul(b, texel_weight,
+                                     (i >> dim) & 0x1 ? weight[dim] :
+                                                      nir_fadd(b, fp_one, nir_fneg(b, weight[dim])));
+         }
+
+         /* Load the linear filter texel. */
+         texel[i] = emulated_image_load(b, num_components, bit_size, desc,
+                                         nir_vec(b, texel_coord, num_coord_components),
+                                         access, sampler_dim, is_array, false);
+
+         /* Multiply the texel by the weight. */
+         texel[i] = nir_fmul(b, texel[i], texel_weight);
+      }
+
+      /* Sum up all weighted texels to get the final result of linear filtering. */
+      result_linear = zero;
+      for (unsigned i = 0; i < (1 << num_dim_coords); i++)
+         result_linear = nir_fadd(b, result_linear, texel[i]);
+   }
+   nir_pop_if(b, if_nearest);
+
+   return nir_if_phi(b, result_nearest, result_linear);
+}
+
+static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
+{
+   if (instr->type == nir_instr_type_intrinsic) {
+      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+      nir_deref_instr *deref;
+      enum gl_access_qualifier access;
+      enum glsl_sampler_dim dim;
+      bool is_array;
+      nir_ssa_def *desc = NULL, *result = NULL;
+      ASSERTED const char *intr_name;
+
+      nir_ssa_def *dst = &intr->dest.ssa;
+      b->cursor = nir_before_instr(instr);
+
+      switch (intr->intrinsic) {
+      case nir_intrinsic_image_load:
+      case nir_intrinsic_image_store:
+         access = nir_intrinsic_access(intr);
+         dim = nir_intrinsic_image_dim(intr);
+         if (dim == GLSL_SAMPLER_DIM_BUF)
+            return false;
+         is_array = nir_intrinsic_image_array(intr);
+         desc = nir_image_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8,
+                                         32, intr->src[0].ssa);
+         break;
+
+      case nir_intrinsic_image_deref_load:
+      case nir_intrinsic_image_deref_store:
+         deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
+         access = nir_deref_instr_get_variable(deref)->data.access;
+         dim = glsl_get_sampler_dim(deref->type);
+         if (dim == GLSL_SAMPLER_DIM_BUF)
+            return false;
+         is_array = glsl_sampler_type_is_array(deref->type);
+         desc = nir_image_deref_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8,
+                                               32, intr->src[0].ssa);
+         break;
+
+      case nir_intrinsic_bindless_image_load:
+      case nir_intrinsic_bindless_image_store:
+         access = nir_intrinsic_access(intr);
+         dim = nir_intrinsic_image_dim(intr);
+         if (dim == GLSL_SAMPLER_DIM_BUF)
+            return false;
+         is_array = nir_intrinsic_image_array(intr);
+         desc = nir_bindless_image_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8,
+                                                  32, intr->src[0].ssa);
+         break;
+
+      default:
+         intr_name = nir_intrinsic_infos[intr->intrinsic].name;
+
+         /* No other intrinsics are expected from VAAPI and OpenMAX.
+          * (this lowering is only used by CDNA, which only uses those frontends)
+          */
+         if (strstr(intr_name, "image") == intr_name ||
+             strstr(intr_name, "bindless_image") == intr_name) {
+            fprintf(stderr, "Unexpected image opcode: ");
+            nir_print_instr(instr, stderr);
+            fprintf(stderr, "\nAborting to prevent a hang.");
+            abort();
+         }
+         return false;
+      }
+
+      switch (intr->intrinsic) {
+      case nir_intrinsic_image_load:
+      case nir_intrinsic_image_deref_load:
+      case nir_intrinsic_bindless_image_load:
+         result = emulated_image_load(b, intr->dest.ssa.num_components, intr->dest.ssa.bit_size,
+                                      desc, intr->src[1].ssa, access, dim, is_array, true);
+         nir_ssa_def_rewrite_uses_after(dst, result, instr);
+         nir_instr_remove(instr);
+         return true;
+
+      case nir_intrinsic_image_store:
+      case nir_intrinsic_image_deref_store:
+      case nir_intrinsic_bindless_image_store:
+         emulated_image_store(b, desc, intr->src[1].ssa, intr->src[3].ssa, access, dim, is_array);
+         nir_instr_remove(instr);
+         return true;
+
+      default:
+         unreachable("shouldn't get here");
+      }
+   } else if (instr->type == nir_instr_type_tex) {
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      nir_tex_instr *new_tex;
+      nir_ssa_def *coord = NULL, *desc = NULL, *sampler_desc = NULL, *result = NULL;
+
+      nir_ssa_def *dst = &tex->dest.ssa;
+      b->cursor = nir_before_instr(instr);
+
+      switch (tex->op) {
+      case nir_texop_tex:
+      case nir_texop_txl:
+      case nir_texop_txf:
+         for (unsigned i = 0; i < tex->num_srcs; i++) {
+            switch (tex->src[i].src_type) {
+            case nir_tex_src_texture_deref:
+            case nir_tex_src_texture_handle:
+               if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+                  return false;
+               new_tex = nir_tex_instr_create(b->shader, 1);
+               new_tex->op = nir_texop_descriptor_amd;
+               new_tex->sampler_dim = tex->sampler_dim;
+               new_tex->is_array = tex->is_array;
+               new_tex->texture_index = tex->texture_index;
+               new_tex->sampler_index = tex->sampler_index;
+               new_tex->dest_type = nir_type_int32;
+               nir_src_copy(&new_tex->src[0].src, &tex->src[i].src, &new_tex->instr);
+               new_tex->src[0].src_type = tex->src[i].src_type;
+               nir_ssa_dest_init(&new_tex->instr, &new_tex->dest,
+                                 nir_tex_instr_dest_size(new_tex), 32, NULL);
+               nir_builder_instr_insert(b, &new_tex->instr);
+               desc = &new_tex->dest.ssa;
+               break;
+
+            case nir_tex_src_sampler_deref:
+            case nir_tex_src_sampler_handle:
+               if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+                  return false;
+               new_tex = nir_tex_instr_create(b->shader, 1);
+               new_tex->op = nir_texop_sampler_descriptor_amd;
+               new_tex->sampler_dim = tex->sampler_dim;
+               new_tex->is_array = tex->is_array;
+               new_tex->texture_index = tex->texture_index;
+               new_tex->sampler_index = tex->sampler_index;
+               new_tex->dest_type = nir_type_int32;
+               nir_src_copy(&new_tex->src[0].src, &tex->src[i].src, &new_tex->instr);
+               new_tex->src[0].src_type = tex->src[i].src_type;
+               nir_ssa_dest_init(&new_tex->instr, &new_tex->dest,
+                                 nir_tex_instr_dest_size(new_tex), 32, NULL);
+               nir_builder_instr_insert(b, &new_tex->instr);
+               sampler_desc = &new_tex->dest.ssa;
+               break;
+
+            case nir_tex_src_coord:
+               coord = tex->src[i].src.ssa;
+               break;
+
+            case nir_tex_src_projector:
+            case nir_tex_src_comparator:
+            case nir_tex_src_offset:
+            case nir_tex_src_texture_offset:
+            case nir_tex_src_sampler_offset:
+            case nir_tex_src_plane:
+               unreachable("unsupported texture src");
+
+            default:;
+            }
+         }
+
+         switch (tex->op) {
+         case nir_texop_txf:
+            result = emulated_image_load(b, tex->dest.ssa.num_components, tex->dest.ssa.bit_size,
+                                         desc, coord,
+                                         ACCESS_RESTRICT | ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER,
+                                         tex->sampler_dim, tex->is_array, true);
+            nir_ssa_def_rewrite_uses_after(dst, result, instr);
+            nir_instr_remove(instr);
+            return true;
+
+         case nir_texop_tex:
+         case nir_texop_txl:
+            result = emulated_tex_level_zero(b, tex->dest.ssa.num_components, tex->dest.ssa.bit_size,
+                                  desc, sampler_desc, coord, tex->sampler_dim, tex->is_array);
+            nir_ssa_def_rewrite_uses_after(dst, result, instr);
+            nir_instr_remove(instr);
+            return true;
+
+         default:
+            unreachable("shouldn't get here");
+         }
+         break;
+
+      case nir_texop_descriptor_amd:
+      case nir_texop_sampler_descriptor_amd:
+         return false;
+
+      default:
+         fprintf(stderr, "Unexpected texture opcode: ");
+         nir_print_instr(instr, stderr);
+         fprintf(stderr, "\nAborting to prevent a hang.");
+         abort();
+      }
+   }
+
+   return false;
+}
+
+bool ac_nir_lower_image_opcodes(nir_shader *nir)
+{
+   return nir_shader_instructions_pass(nir, lower_image_opcodes,
+                                       nir_metadata_dominance |
+                                       nir_metadata_block_index,
+                                       NULL);
+}
diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build

index 21cb4aa..21853a6 100644 (file)
--- a/src/amd/common/meson.build
+++ b/src/amd/common/meson.build
@@ -94,6 +94,7 @@ amd_common_files = files(
    'ac_nir_cull.c',
    'ac_nir_lower_esgs_io_to_mem.c',
    'ac_nir_lower_global_access.c',
+  'ac_nir_lower_image_opcodes_cdna.c',
    'ac_nir_lower_resinfo.c',
    'ac_nir_lower_subdword_loads.c',
    'ac_nir_lower_taskmesh_io_to_mem.c',
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c

index 596d778..c4f933d 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1937,6 +1937,10 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
     if (sel->stage == MESA_SHADER_FRAGMENT && key->ps.mono.point_smoothing)
        NIR_PASS(progress, nir, nir_lower_point_smooth);
  
+   /* This must be before si_nir_lower_resource. */
+   if (!sel->screen->info.has_image_opcodes)
+      NIR_PASS(progress, nir, ac_nir_lower_image_opcodes);
+
     NIR_PASS(progress, nir, si_nir_lower_resource, shader, args);
  
     bool is_last_vgt_stage =
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp

index 99935a7..3a4417c 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -190,9 +190,8 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
        shader_variant_flags |= 1 << 4;
     if (sel->screen->record_llvm_ir)
        shader_variant_flags |= 1 << 5;
-
-   /* bit gap */
-
+   if (sel->screen->info.has_image_opcodes)
+      shader_variant_flags |= 1 << 6;
     if (sel->screen->options.no_infinite_interp)
        shader_variant_flags |= 1 << 7;
     if (sel->screen->options.clamp_div_by_zero)
author	Marek Olšák <marek.olsak@amd.com>
	Fri, 28 Oct 2022 21:21:07 +0000 (17:21 -0400)
committer	Marge Bot <emma+marge@anholt.net>
	Thu, 6 Apr 2023 15:00:53 +0000 (15:00 +0000)
src/amd/common/ac_nir.h		patch \| blob \| history
src/amd/common/ac_nir_lower_image_opcodes_cdna.c	[new file with mode: 0644]	patch \| blob
src/amd/common/meson.build		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state_shaders.cpp		patch \| blob \| history