nir: Add load_typed_buffer_amd intrinsic.

author Timur Kristóf <timur.kristof@gmail.com>

Thu, 2 Feb 2023 09:47:58 +0000 (10:47 +0100)

committer Marge Bot <emma+marge@anholt.net>

Wed, 15 Mar 2023 14:54:27 +0000 (14:54 +0000)
author Timur Kristóf <timur.kristof@gmail.com>
Thu, 2 Feb 2023 09:47:58 +0000 (10:47 +0100)
committer Marge Bot <emma+marge@anholt.net>
Wed, 15 Mar 2023 14:54:27 +0000 (14:54 +0000)
diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c

index 78087e9..a8ca2ed 100644 (file)
--- a/src/compiler/nir/nir_divergence_analysis.c
+++ b/src/compiler/nir/nir_divergence_analysis.c
@@ -407,6 +407,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
     case nir_intrinsic_load_kernel_input:
     case nir_intrinsic_load_task_payload:
     case nir_intrinsic_load_buffer_amd:
+   case nir_intrinsic_load_typed_buffer_amd:
     case nir_intrinsic_image_samples:
     case nir_intrinsic_image_deref_samples:
     case nir_intrinsic_bindless_image_samples:
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py

index 29ba0c7..f72cdd1 100644 (file)
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -182,6 +182,7 @@ index("enum glsl_sampler_dim", "image_dim")
  index("bool", "image_array")
  
  # Image format for image intrinsics
+# Vertex buffer format for load_typed_buffer_amd
  index("enum pipe_format", "format")
  
  # Access qualifiers for image and memory access intrinsics. ACCESS_RESTRICT is
@@ -1331,6 +1332,18 @@ intrinsic("load_buffer_amd", src_comp=[4, 1, 1, 1], dest_comp=0, indices=[BASE,
  # src[] = { store value, descriptor, vector byte offset, scalar byte offset, index offset }
  intrinsic("store_buffer_amd", src_comp=[0, 4, 1, 1, 1], indices=[BASE, WRITE_MASK, MEMORY_MODES, ACCESS])
  
+# Typed buffer load of arbitrary length, using a specified format.
+# src[] = { descriptor, vector byte offset, scalar byte offset, index offset }
+#
+# The compiler backend is responsible for emitting correct HW instructions according to alignment, range etc.
+# Users of this intrinsic must ensure that the first component being loaded is really the first component
+# of the specified format, because range analysis assumes this.
+# The size of the specified format also determines the memory range that this instruction is allowed to access.
+#
+# The index offset is multiplied by the stride in the descriptor, if any.
+# The vector/scalar offsets are in bytes, BASE is a constant byte offset.
+intrinsic("load_typed_buffer_amd", src_comp=[4, 1, 1, 1], dest_comp=0, indices=[BASE, MEMORY_MODES, ACCESS, FORMAT, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
+
  # src[] = { address, unsigned 32-bit offset }.
  load("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
  # src[] = { value, address, unsigned 32-bit offset }.
diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c

index 57d40aa..f4f6c0a 100644 (file)
--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -25,6 +25,7 @@
  #include "nir.h"
  #include "nir_range_analysis.h"
  #include "util/hash_table.h"
+#include "util/u_math.h"
  
  /**
   * Analyzes a sequence of operations to determine some aspects of the range of
@@ -1469,6 +1470,24 @@ nir_unsigned_upper_bound_impl(nir_shader *shader, struct hash_table *range_ht,
           /* Very generous maximum: TCS/TES executed by largest possible workgroup */
           res = config->max_workgroup_invocations / MAX2(shader->info.tess.tcs_vertices_out, 1u);
           break;
+      case nir_intrinsic_load_typed_buffer_amd: {
+         const enum pipe_format format = nir_intrinsic_format(intrin);
+         if (format == PIPE_FORMAT_NONE)
+            break;
+
+         const struct util_format_description* desc = util_format_description(format);
+         if (desc->channel[scalar.comp].type != UTIL_FORMAT_TYPE_UNSIGNED)
+            break;
+
+         if (desc->channel[scalar.comp].normalized) {
+            res = fui(1.0);
+            break;
+         }
+
+         const uint32_t chan_max = u_uintN_max(desc->channel[scalar.comp].size);
+         res = desc->channel[scalar.comp].pure_integer ? chan_max : fui(chan_max);
+         break;
+      }
        case nir_intrinsic_load_scalar_arg_amd:
        case nir_intrinsic_load_vector_arg_amd: {
           uint32_t upper_bound = nir_intrinsic_arg_upper_bound_u32_amd(intrin);
author	Timur Kristóf <timur.kristof@gmail.com>
	Thu, 2 Feb 2023 09:47:58 +0000 (10:47 +0100)
committer	Marge Bot <emma+marge@anholt.net>
	Wed, 15 Mar 2023 14:54:27 +0000 (14:54 +0000)
src/compiler/nir/nir_divergence_analysis.c		patch \| blob \| history
src/compiler/nir/nir_intrinsics.py		patch \| blob \| history
src/compiler/nir/nir_range_analysis.c		patch \| blob \| history