intel/nir: add lower for sparse images & textures
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Tue, 27 Jun 2023 14:54:05 +0000 (17:54 +0300)
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 26 Jul 2023 23:02:59 +0000 (02:02 +0300)
We have to lower images into image load + sampler residency.

There is also a restriction on sampler access with a compare, lower
those as 2 sampler instructions to meet the restriction.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23882>

src/intel/compiler/brw_nir.c
src/intel/compiler/brw_nir.h
src/intel/compiler/brw_nir_lower_sparse.c [new file with mode: 0644]
src/intel/compiler/meson.build

index e8100d1..3409b47 100644 (file)
@@ -1561,6 +1561,8 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
 
    UNUSED bool progress; /* Written by OPT */
 
+   OPT(brw_nir_lower_sparse_intrinsics);
+
    OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler);
 
    OPT(nir_opt_combine_barriers, combine_all_memory_barriers, NULL);
index 52b5538..2a109d8 100644 (file)
@@ -191,6 +191,8 @@ bool brw_nir_lower_conversions(nir_shader *nir);
 
 bool brw_nir_lower_shading_rate_output(nir_shader *nir);
 
+bool brw_nir_lower_sparse_intrinsics(nir_shader *nir);
+
 struct brw_nir_lower_storage_image_opts {
    const struct intel_device_info *devinfo;
 
diff --git a/src/intel/compiler/brw_nir_lower_sparse.c b/src/intel/compiler/brw_nir_lower_sparse.c
new file mode 100644 (file)
index 0000000..8976762
--- /dev/null
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "compiler/nir/nir_builder.h"
+
+/*
+ * This pass lowers a few of the sparse instructions to something HW can
+ * handle.
+ *
+ * The image_*_sparse_load intrinsics are lowered into 2 instructions, a
+ * regular image_*_load intrinsic and a sparse texture txf operation and
+ * reconstructs the sparse vector of the original intrinsic using the 2 new
+ * values. We need to do this because our backend implements image load/store
+ * using the dataport and the dataport unit doesn't provide residency
+ * information. We need to use the sampler for residency.
+ *
+ * The is_sparse_texels_resident intrinsic is lowered to a bit checking
+ * operation as the data reported by the sampler is a single bit per lane in
+ * the first component.
+ *
+ * The tex_* instructions with a compare value need to be lower into 2
+ * instructions due to a HW limitation :
+ *
+ * SKL PRMs, Volume 7: 3D-Media-GPGPU, Messages, SIMD Payloads :
+ *
+ *    "The Pixel Null Mask field, when enabled via the Pixel Null Mask Enable
+ *     will be incorect for sample_c when applied to a surface with 64-bit per
+ *     texel format such as R16G16BA16_UNORM. Pixel Null mask Enable may
+ *     incorrectly report pixels as referencing a Null surface."
+ */
+
+static void
+lower_is_sparse_texels_resident(nir_builder *b, nir_intrinsic_instr *intrin)
+{
+   b->cursor = nir_instr_remove(&intrin->instr);
+
+   nir_ssa_def_rewrite_uses(
+      &intrin->dest.ssa,
+      nir_i2b(b, nir_iand(b, intrin->src[0].ssa,
+                              nir_ishl(b, nir_imm_int(b, 1),
+                                          nir_load_subgroup_invocation(b)))));
+}
+
+static void
+lower_sparse_residency_code_and(nir_builder *b, nir_intrinsic_instr *intrin)
+{
+   b->cursor = nir_instr_remove(&intrin->instr);
+
+   nir_ssa_def_rewrite_uses(
+      &intrin->dest.ssa,
+      nir_iand(b, intrin->src[0].ssa, intrin->src[1].ssa));
+}
+
+static void
+lower_sparse_image_load(nir_builder *b, nir_intrinsic_instr *intrin)
+{
+   b->cursor = nir_instr_remove(&intrin->instr);
+
+   nir_ssa_def *img_load;
+   nir_intrinsic_instr *new_intrin;
+   if (intrin->intrinsic == nir_intrinsic_image_sparse_load) {
+      img_load = nir_image_load(b,
+                                intrin->num_components - 1,
+                                nir_dest_bit_size(intrin->dest),
+                                intrin->src[0].ssa,
+                                intrin->src[1].ssa,
+                                intrin->src[2].ssa,
+                                intrin->src[3].ssa);
+      new_intrin = nir_instr_as_intrinsic(img_load->parent_instr);
+      nir_intrinsic_set_range_base(new_intrin, nir_intrinsic_range_base(intrin));
+   } else {
+      img_load = nir_bindless_image_load(b,
+                                         intrin->num_components - 1,
+                                         nir_dest_bit_size(intrin->dest),
+                                         intrin->src[0].ssa,
+                                         intrin->src[1].ssa,
+                                         intrin->src[2].ssa,
+                                         intrin->src[3].ssa);
+      new_intrin = nir_instr_as_intrinsic(img_load->parent_instr);
+   }
+
+   nir_intrinsic_set_image_array(new_intrin, nir_intrinsic_image_array(intrin));
+   nir_intrinsic_set_image_dim(new_intrin, nir_intrinsic_image_dim(intrin));
+   nir_intrinsic_set_format(new_intrin, nir_intrinsic_format(intrin));
+   nir_intrinsic_set_access(new_intrin, nir_intrinsic_access(intrin));
+   nir_intrinsic_set_dest_type(new_intrin, nir_intrinsic_dest_type(intrin));
+
+   nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
+   for (unsigned i = 0; i < intrin->num_components - 1; i++) {
+      dests[i] = nir_channel(b, img_load, i);
+   }
+
+   /* Use texture instruction to compute residency */
+   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
+
+   tex->op = nir_texop_txf;
+   /* We don't care about the dest type since we're not using any of that
+    * data.
+    */
+   tex->dest_type = nir_type_float32;
+   tex->is_array = nir_intrinsic_image_array(intrin);
+   tex->is_shadow = false;
+   tex->sampler_index = 0;
+   tex->is_sparse = true;
+
+   tex->src[0].src_type = intrin->intrinsic == nir_intrinsic_image_sparse_load ?
+                          nir_tex_src_texture_offset :
+                          nir_tex_src_texture_handle;
+   tex->src[0].src = nir_src_for_ssa(intrin->src[0].ssa);
+
+   tex->coord_components = nir_image_intrinsic_coord_components(intrin);
+   nir_ssa_def *coord;
+   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
+       nir_intrinsic_image_array(intrin)) {
+      tex->coord_components++;
+
+      nir_ssa_def *img_layer = nir_channel(b, intrin->src[1].ssa, 2);
+      nir_ssa_def *tex_slice = nir_idiv(b, img_layer, nir_imm_int(b, 6));
+      nir_ssa_def *tex_face =
+         nir_iadd(b, img_layer, nir_ineg(b, nir_imul_imm(b, img_layer, 6)));
+      nir_ssa_def *comps[4] = {
+         nir_channel(b, intrin->src[1].ssa, 0),
+         nir_channel(b, intrin->src[1].ssa, 1),
+         tex_face,
+         tex_slice
+      };
+      coord = nir_vec(b, comps, 4);
+   } else {
+      coord = nir_channels(b, intrin->src[1].ssa,
+                           nir_component_mask(tex->coord_components));
+   }
+   tex->src[1].src_type = nir_tex_src_coord;
+   tex->src[1].src = nir_src_for_ssa(coord);
+
+   tex->src[2].src_type = nir_tex_src_lod;
+   tex->src[2].src = nir_src_for_ssa(nir_imm_int(b, 0));
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 5,
+                     nir_dest_bit_size(intrin->dest));
+
+   nir_builder_instr_insert(b, &tex->instr);
+
+   dests[intrin->num_components - 1] = nir_channel(b, &tex->dest.ssa, 4);
+
+   nir_ssa_def_rewrite_uses(
+      &intrin->dest.ssa,
+      nir_vec(b, dests, intrin->num_components));
+}
+
+static void
+lower_tex_compare(nir_builder *b, nir_tex_instr *tex, int compare_idx)
+{
+   b->cursor = nir_after_instr(&tex->instr);
+
+   /* Clone the original instruction */
+   nir_tex_instr *sparse_tex = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
+   nir_ssa_dest_init(&sparse_tex->instr, &sparse_tex->dest,
+                     tex->dest.ssa.num_components, tex->dest.ssa.bit_size);
+   nir_builder_instr_insert(b, &sparse_tex->instr);
+
+   /* Drop the compare source on the cloned instruction */
+   nir_tex_instr_remove_src(sparse_tex, compare_idx);
+
+   /* Drop the residency query on the original tex instruction */
+   tex->is_sparse = false;
+   tex->dest.ssa.num_components = tex->dest.ssa.num_components - 1;
+
+   nir_ssa_def *new_comps[NIR_MAX_VEC_COMPONENTS];
+   for (unsigned i = 0; i < tex->dest.ssa.num_components; i++)
+      new_comps[i] = nir_channel(b, &tex->dest.ssa, i);
+   new_comps[tex->dest.ssa.num_components] =
+      nir_channel(b, &sparse_tex->dest.ssa, tex->dest.ssa.num_components);
+
+   nir_ssa_def *new_vec = nir_vec(b, new_comps, sparse_tex->dest.ssa.num_components);
+
+   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, new_vec, new_vec->parent_instr);
+}
+
+static bool
+lower_sparse_intrinsics(nir_builder *b, nir_instr *instr, void *cb_data)
+{
+   switch (instr->type) {
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_image_sparse_load:
+      case nir_intrinsic_bindless_image_sparse_load:
+         lower_sparse_image_load(b, intrin);
+         return true;
+
+      case nir_intrinsic_is_sparse_texels_resident:
+         lower_is_sparse_texels_resident(b, intrin);
+         return true;
+
+      case nir_intrinsic_sparse_residency_code_and:
+         lower_sparse_residency_code_and(b, intrin);
+         return true;
+
+      default:
+         return false;
+      }
+   }
+
+   case nir_instr_type_tex: {
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      int comp_idx = nir_tex_instr_src_index(tex, nir_tex_src_comparator);
+      if (comp_idx != -1 && tex->is_sparse) {
+         lower_tex_compare(b, tex, comp_idx);
+         return true;
+      }
+      return false;
+   }
+
+   default:
+      return false;
+   }
+}
+
+bool
+brw_nir_lower_sparse_intrinsics(nir_shader *nir)
+{
+   return nir_shader_instructions_pass(nir, lower_sparse_intrinsics,
+                                       nir_metadata_block_index |
+                                       nir_metadata_dominance,
+                                       NULL);
+}
index adcbeab..6a07a70 100644 (file)
@@ -96,6 +96,7 @@ libintel_compiler_files = files(
   'brw_nir_lower_rt_intrinsics.c',
   'brw_nir_lower_shader_calls.c',
   'brw_nir_lower_shading_rate_output.c',
+  'brw_nir_lower_sparse.c',
   'brw_nir_lower_storage_image.c',
   'brw_nir_opt_peephole_ffma.c',
   'brw_nir_opt_peephole_imul32x16.c',