d3d12: Support dynamic UBO/SSBO indexing
authorJesse Natalie <jenatali@microsoft.com>
Mon, 17 Jan 2022 15:31:53 +0000 (07:31 -0800)
committerMarge Bot <emma+marge@anholt.net>
Fri, 21 Jan 2022 23:08:26 +0000 (23:08 +0000)
Reviewed-by: Sil Vilerino <sivileri@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14624>

src/gallium/drivers/d3d12/d3d12_compiler.cpp
src/gallium/drivers/d3d12/d3d12_root_signature.cpp
src/microsoft/clc/clc_compiler.c
src/microsoft/compiler/nir_to_dxil.c
src/microsoft/compiler/nir_to_dxil.h

index 134d268..0e6bd2a 100644 (file)
@@ -182,7 +182,8 @@ compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
    struct nir_to_dxil_options opts = {};
    opts.interpolate_at_vertex = screen->have_load_at_vertex;
    opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
-   opts.ubo_binding_offset = shader->has_default_ubo0 ? 0 : 1;
+   opts.no_ubo0 = !shader->has_default_ubo0;
+   opts.last_ubo_is_not_arrayed = shader->num_state_vars > 0;
    opts.provoking_vertex = key->fs.provoking_vertex;
    opts.environment = DXIL_ENVIRONMENT_GL;
 
@@ -219,7 +220,7 @@ compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
    if(nir->info.num_ubos) {
       // Ignore state_vars ubo as it is bound as root constants
       unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
-      for(unsigned i = opts.ubo_binding_offset; i < num_ubo_bindings; ++i) {
+      for(unsigned i = shader->has_default_ubo0 ? 0 : 1; i < num_ubo_bindings; ++i) {
          shader->cb_bindings[shader->num_cb_bindings++].binding = i;
       }
    }
index d5fb3bf..07b35ee 100644 (file)
@@ -72,13 +72,12 @@ init_constant_root_param(D3D12_ROOT_PARAMETER1 *param,
 }
 
 static inline void
-init_range_root_param(D3D12_ROOT_PARAMETER1 *param,
-                      D3D12_DESCRIPTOR_RANGE1 *range,
-                      D3D12_DESCRIPTOR_RANGE_TYPE type,
-                      uint32_t num_descs,
-                      D3D12_SHADER_VISIBILITY visibility,
-                      uint32_t base_shader_register,
-                      uint32_t register_space)
+init_range(D3D12_DESCRIPTOR_RANGE1 *range,
+           D3D12_DESCRIPTOR_RANGE_TYPE type,
+           uint32_t num_descs,
+           uint32_t base_shader_register,
+           uint32_t register_space,
+           uint32_t offset_from_start)
 {
    range->RangeType = type;
    range->NumDescriptors = num_descs;
@@ -89,8 +88,19 @@ init_range_root_param(D3D12_ROOT_PARAMETER1 *param,
       range->Flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE;
    else
       range->Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS;
-   range->OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+   range->OffsetInDescriptorsFromTableStart = offset_from_start;
+}
 
+static inline void
+init_range_root_param(D3D12_ROOT_PARAMETER1 *param,
+                      D3D12_DESCRIPTOR_RANGE1 *range,
+                      D3D12_DESCRIPTOR_RANGE_TYPE type,
+                      uint32_t num_descs,
+                      D3D12_SHADER_VISIBILITY visibility,
+                      uint32_t base_shader_register,
+                      uint32_t register_space)
+{
+   init_range(range, type, num_descs, base_shader_register, register_space, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND);
    param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
    param->DescriptorTable.NumDescriptorRanges = 1;
    param->DescriptorTable.pDescriptorRanges = range;
@@ -102,7 +112,7 @@ create_root_signature(struct d3d12_context *ctx, struct d3d12_root_signature_key
 {
    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
    D3D12_ROOT_PARAMETER1 root_params[D3D12_GFX_SHADER_STAGES * D3D12_NUM_BINDING_TYPES];
-   D3D12_DESCRIPTOR_RANGE1 desc_ranges[D3D12_GFX_SHADER_STAGES * D3D12_NUM_BINDING_TYPES];
+   D3D12_DESCRIPTOR_RANGE1 desc_ranges[D3D12_GFX_SHADER_STAGES * (D3D12_NUM_BINDING_TYPES + 1)];
    unsigned num_params = 0;
    unsigned num_ranges = 0;
 
@@ -140,13 +150,26 @@ create_root_signature(struct d3d12_context *ctx, struct d3d12_root_signature_key
       }
 
       if (key->stages[i].num_ssbos > 0) {
-         init_range_root_param(&root_params[num_params++],
+         init_range_root_param(&root_params[num_params],
                                &desc_ranges[num_ranges++],
                                D3D12_DESCRIPTOR_RANGE_TYPE_UAV,
                                key->stages[i].num_ssbos,
                                visibility,
                                0,
                                0);
+
+         /* To work around a WARP bug, bind these descriptors a second time in descriptor
+          * space 2. Space 0 will be used for static indexing, while space 2 will be used
+          * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
+          * space 2 will be a single array.
+          */
+         root_params[num_params++].DescriptorTable.NumDescriptorRanges++;
+         init_range(&desc_ranges[num_ranges++],
+                    D3D12_DESCRIPTOR_RANGE_TYPE_UAV,
+                    key->stages[i].num_ssbos,
+                    0,
+                    2,
+                    0);
       }
 
       if (key->stages[i].num_images > 0) {
@@ -166,7 +189,7 @@ create_root_signature(struct d3d12_context *ctx, struct d3d12_root_signature_key
             visibility);
       }
       assert(num_params < PIPE_SHADER_TYPES * D3D12_NUM_BINDING_TYPES);
-      assert(num_ranges < PIPE_SHADER_TYPES * D3D12_NUM_BINDING_TYPES);
+      assert(num_ranges < PIPE_SHADER_TYPES * (D3D12_NUM_BINDING_TYPES + 1));
    }
 
    D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc;
index 46ae127..8190953 100644 (file)
@@ -1163,7 +1163,6 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
    struct nir_to_dxil_options opts = {
       .interpolate_at_vertex = false,
       .lower_int16 = (conf && (conf->lower_bit_size & 16) != 0),
-      .ubo_binding_offset = 0,
       .disable_math_refactoring = true,
       .num_kernel_globals = num_global_inputs,
       .environment = DXIL_ENVIRONMENT_CL,
index 5d97d1d..c8bb81c 100644 (file)
@@ -1015,7 +1015,7 @@ emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned cou
        ctx->mod.shader_kind != DXIL_COMPUTE_SHADER)
       ctx->mod.feats.uavs_at_every_stage = true;
 
-   if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) {
+   if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN && space <= 1) {
       for (unsigned i = 0; i < count; ++i) {
          const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_UAV,
                                                                               id, binding + i, false);
@@ -2620,13 +2620,28 @@ get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_cl
 
    unsigned space = 0;
    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
-       class == DXIL_RESOURCE_CLASS_UAV &&
-       kind != DXIL_RESOURCE_KIND_RAW_BUFFER) {
-      space = 1;
+       class == DXIL_RESOURCE_CLASS_UAV) {
+      if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
+         space = 2;
+      else
+         space = 1;
    }
 
-   /* TODO: Figure out how to find this */
+   /* The base binding here will almost always be zero. The only cases where we end
+    * up in this type of dynamic indexing are:
+    * 1. GL UBOs
+    * 2. GL SSBOs
+    * 2. CL SSBOs
+    * In all cases except GL UBOs, the resources are a single zero-based array.
+    * In that case, the base is 1, because uniforms use 0 and cannot by dynamically
+    * indexed. All other cases should either fall into static indexing (first early return),
+    * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or
+    * load_vulkan_descriptor handle creation.
+    */
    unsigned base_binding = 0;
+   if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
+       class == DXIL_RESOURCE_CLASS_CBV)
+      base_binding = 1;
 
    const struct dxil_value *handle = emit_createhandle_call(ctx, class, 
       get_resource_id(ctx, class, space, base_binding), value, !const_block_index);
@@ -4754,10 +4769,21 @@ emit_cbvs(struct ntd_context *ctx)
             return false;
       }
    } else {
-      for (int i = ctx->opts->ubo_binding_offset; i < ctx->shader->info.num_ubos; ++i) {
-         char name[64];
-         snprintf(name, sizeof(name), "__ubo%d", i);
-         if (!emit_cbv(ctx, i, 0, 16384 /*4096 vec4's*/, 1, name))
+      if (ctx->shader->info.num_ubos) {
+         const unsigned ubo_size = 16384 /*4096 vec4's*/;
+         bool has_ubo0 = !ctx->opts->no_ubo0;
+         bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed;
+         unsigned ubo1_array_size = ctx->shader->info.num_ubos -
+            (has_state_vars ? 2 : 1);
+
+         if (has_ubo0 &&
+             !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms"))
+            return false;
+         if (ubo1_array_size &&
+             !emit_cbv(ctx, 1, 0, ubo_size, ubo1_array_size, "__ubos"))
+            return false;
+         if (has_state_vars &&
+             !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars"))
             return false;
       }
    }
@@ -4928,6 +4954,15 @@ emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
                        DXIL_RESOURCE_KIND_RAW_BUFFER, name))
             return false;
       }
+      /* To work around a WARP bug, bind these descriptors a second time in descriptor
+       * space 2. Space 0 will be used for static indexing, while space 2 will be used
+       * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
+       * space 2 will be a single array.
+       */
+      if (ctx->shader->info.num_ssbos &&
+          !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID,
+                    DXIL_RESOURCE_KIND_RAW_BUFFER, "__ssbo_dynamic"))
+         return false;
    }
 
    nir_foreach_image_variable(var, ctx->shader) {
index e1df173..1275b1c 100644 (file)
@@ -84,7 +84,8 @@ struct nir_to_dxil_options {
    bool interpolate_at_vertex;
    bool lower_int16;
    bool disable_math_refactoring;
-   unsigned ubo_binding_offset;
+   bool no_ubo0;
+   bool last_ubo_is_not_arrayed;
    unsigned provoking_vertex;
    unsigned num_kernel_globals;
    enum dxil_environment environment;