From 022e55557b8dee8cc23809551d76c95384c35352 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Thu, 2 Feb 2023 10:47:58 +0100 Subject: [PATCH] nir: Add load_typed_buffer_amd intrinsic. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This new intrinsic maps to the MTBUF instruction format on AMD GPUs and represents a typed buffer load in NIR. Also add an unsigned upper bound for the new intrinsic. Code for that ported from aco_instruction_selection_setup. Signed-off-by: Timur Kristóf Reviewed-by: Konstantin Seurer Reviewed-by: Marek Olšák Reviewed-by: Rhys Perry Part-of: --- src/compiler/nir/nir_divergence_analysis.c | 1 + src/compiler/nir/nir_intrinsics.py | 13 +++++++++++++ src/compiler/nir/nir_range_analysis.c | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 78087e9..a8ca2ed 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -407,6 +407,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) case nir_intrinsic_load_kernel_input: case nir_intrinsic_load_task_payload: case nir_intrinsic_load_buffer_amd: + case nir_intrinsic_load_typed_buffer_amd: case nir_intrinsic_image_samples: case nir_intrinsic_image_deref_samples: case nir_intrinsic_bindless_image_samples: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 29ba0c7..f72cdd1 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -182,6 +182,7 @@ index("enum glsl_sampler_dim", "image_dim") index("bool", "image_array") # Image format for image intrinsics +# Vertex buffer format for load_typed_buffer_amd index("enum pipe_format", "format") # Access qualifiers for image and memory access intrinsics. ACCESS_RESTRICT is @@ -1331,6 +1332,18 @@ intrinsic("load_buffer_amd", src_comp=[4, 1, 1, 1], dest_comp=0, indices=[BASE, # src[] = { store value, descriptor, vector byte offset, scalar byte offset, index offset } intrinsic("store_buffer_amd", src_comp=[0, 4, 1, 1, 1], indices=[BASE, WRITE_MASK, MEMORY_MODES, ACCESS]) +# Typed buffer load of arbitrary length, using a specified format. +# src[] = { descriptor, vector byte offset, scalar byte offset, index offset } +# +# The compiler backend is responsible for emitting correct HW instructions according to alignment, range etc. +# Users of this intrinsic must ensure that the first component being loaded is really the first component +# of the specified format, because range analysis assumes this. +# The size of the specified format also determines the memory range that this instruction is allowed to access. +# +# The index offset is multiplied by the stride in the descriptor, if any. +# The vector/scalar offsets are in bytes, BASE is a constant byte offset. +intrinsic("load_typed_buffer_amd", src_comp=[4, 1, 1, 1], dest_comp=0, indices=[BASE, MEMORY_MODES, ACCESS, FORMAT, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) + # src[] = { address, unsigned 32-bit offset }. load("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) # src[] = { value, address, unsigned 32-bit offset }. diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index 57d40aa..f4f6c0a 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -25,6 +25,7 @@ #include "nir.h" #include "nir_range_analysis.h" #include "util/hash_table.h" +#include "util/u_math.h" /** * Analyzes a sequence of operations to determine some aspects of the range of @@ -1469,6 +1470,24 @@ nir_unsigned_upper_bound_impl(nir_shader *shader, struct hash_table *range_ht, /* Very generous maximum: TCS/TES executed by largest possible workgroup */ res = config->max_workgroup_invocations / MAX2(shader->info.tess.tcs_vertices_out, 1u); break; + case nir_intrinsic_load_typed_buffer_amd: { + const enum pipe_format format = nir_intrinsic_format(intrin); + if (format == PIPE_FORMAT_NONE) + break; + + const struct util_format_description* desc = util_format_description(format); + if (desc->channel[scalar.comp].type != UTIL_FORMAT_TYPE_UNSIGNED) + break; + + if (desc->channel[scalar.comp].normalized) { + res = fui(1.0); + break; + } + + const uint32_t chan_max = u_uintN_max(desc->channel[scalar.comp].size); + res = desc->channel[scalar.comp].pure_integer ? chan_max : fui(chan_max); + break; + } case nir_intrinsic_load_scalar_arg_amd: case nir_intrinsic_load_vector_arg_amd: { uint32_t upper_bound = nir_intrinsic_arg_upper_bound_u32_amd(intrin); -- 2.7.4