From 3f4bfecee6d639bbe17097c34131c39eaac6e696 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Mon, 1 Nov 2021 09:32:03 -0700 Subject: [PATCH] nir: Add some notes about const/uniform array access rules in GL. I was doing some RE on freedreno and we had some questions about when the hardware might need non-uniform or non-constant array access for various descriptor types, so let's leave some notes for the next person. Reviewed-by: Connor Abbott Part-of: --- src/compiler/nir/nir.h | 18 ++++++++++++++++-- src/compiler/nir/nir_intrinsics.py | 19 ++++++++++++++----- src/compiler/shader_enums.h | 27 ++++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 839f76a..45f025c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1981,7 +1981,10 @@ typedef enum { * * This is added to nir_tex_instr::sampler_index. Unless * nir_tex_instr::sampler_non_uniform is set, this is guaranteed to be - * dynamically uniform. + * dynamically uniform. This should not be present until GLSL ES 3.20, GLSL + * 4.00, or ARB_gpu_shader5, because in ES 3.10 and GL 3.30 samplers said + * "When aggregated into arrays within a shader, samplers can only be indexed + * with a constant integral expression." */ nir_tex_src_sampler_offset, @@ -2144,7 +2147,18 @@ typedef struct { /** True if the texture index or handle is not dynamically uniform */ bool texture_non_uniform; - /** True if the sampler index or handle is not dynamically uniform */ + /** True if the sampler index or handle is not dynamically uniform. + * + * This may be set when VK_EXT_descriptor_indexing is supported and the + * appropriate capability is enabled. + * + * This should always be false in GLSL (GLSL ES 3.20 says "When aggregated + * into arrays within a shader, opaque types can only be indexed with a + * dynamically uniform integral expression", and GLSL 4.60 says "When + * aggregated into arrays within a shader, [texture, sampler, and + * samplerShadow] types can only be indexed with a dynamically uniform + * expression, or texture lookup will result in undefined values."). + */ bool sampler_non_uniform; /** The texture index diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index f5b3893..1cac0db 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -540,9 +540,15 @@ intrinsic("rt_trace_ray", src_comp=[-1, 1, 1, 1, 1, 1, 3, 1, 3, 1, -1], # Atomic counters # -# The *_var variants take an atomic_uint nir_variable, while the other, -# lowered, variants take a constant buffer index and register offset. - +# The *_deref variants take an atomic_uint nir_variable, while the other, +# lowered, variants take a buffer index and register offset. The buffer index +# is always constant, as there's no way to declare an array of atomic counter +# buffers. +# +# The register offset may be non-constant but must by dynamically uniform +# ("Atomic counters aggregated into arrays within a shader can only be indexed +# with dynamically uniform integral expressions, otherwise results are +# undefined.") def atomic(name, flags=[]): intrinsic(name + "_deref", src_comp=[-1], dest_comp=1, flags=flags) intrinsic(name, src_comp=[1], dest_comp=1, indices=[BASE], flags=flags) @@ -576,7 +582,9 @@ atomic3("atomic_counter_comp_swap") # In the first version, the image variable contains the memory and layout # qualifiers that influence the semantics of the intrinsic. In the second and # third, the image format and access qualifiers are provided as constant -# indices. +# indices. Up through GLSL ES 3.10, the image index source may only be a +# constant array access. GLSL ES 3.20 and GLSL 4.00 allow dynamically uniform +# indexing. # # All image intrinsics take a four-coordinate vector and a sample index as # 2nd and 3rd sources, determining the location within the image that will be @@ -661,7 +669,8 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0, # All SSBO operations take 3 sources except CompSwap that takes 4. These # sources represent: # -# 0: The SSBO buffer index. +# 0: The SSBO buffer index (dynamically uniform in GLSL, possibly non-uniform +# with VK_EXT_descriptor_indexing). # 1: The offset into the SSBO buffer of the variable that the atomic # operation will operate on. # 2: The data parameter to the atomic function (i.e. the value to add diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index 098f370..270bd77 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -944,7 +944,32 @@ enum gl_access_qualifier /* The memory used by the access/variable is not written. */ ACCESS_NON_WRITEABLE = (1 << 4), - /** The access may use a non-uniform buffer or image index */ + /** + * The access may use a non-uniform buffer or image index. + * + * This is not allowed in either OpenGL or OpenGL ES, or Vulkan unless + * VK_EXT_descriptor_indexing is supported and the appropriate capability is + * enabled. + * + * Some GL spec archaeology justifying this: + * + * Up through at least GLSL ES 3.20 and GLSL 4.50, "Opaque Types" says "When + * aggregated into arrays within a shader, opaque types can only be indexed + * with a dynamically uniform integral expression (see section 3.9.3) unless + * otherwise noted; otherwise, results are undefined." + * + * The original GL_AB_shader_image_load_store specification for desktop GL + * didn't have this restriction ("Images may be aggregated into arrays within + * a shader (using square brackets [ ]) and can be indexed with general + * integer expressions.") At the same time, + * GL_ARB_shader_storage_buffer_objects *did* have the uniform restriction + * ("A uniform or shader storage block array can only be indexed with a + * dynamically uniform integral expression, otherwise results are + * undefined"), just like ARB_gpu_shader5 did when it first introduced a + * non-constant indexing of an opaque type with samplers. So, we assume that + * this was an oversight in the original image_load_store spec, and was + * considered a correction in the merge to core. + */ ACCESS_NON_UNIFORM = (1 << 5), /* This has the same semantics as NIR_INTRINSIC_CAN_REORDER, only to be -- 2.7.4