anv: add an option for using indirect descriptors
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Thu, 23 Feb 2023 12:18:51 +0000 (14:18 +0200)
committerMarge Bot <emma+marge@anholt.net>
Tue, 30 May 2023 06:36:37 +0000 (06:36 +0000)
This is the default for now. It needs to be part the pipeline hashing
as we will allow this to be tweaked per application.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>

src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_pipeline.c
src/intel/vulkan/anv_private.h
src/util/driconf.h

index 1c07c45..73ba463 100644 (file)
@@ -77,6 +77,7 @@ static const driOptionDescription anv_dri_options[] = {
       DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4)
       DRI_CONF_NO_16BIT(false)
       DRI_CONF_ANV_QUERY_CLEAR_WITH_BLORP_THRESHOLD(6)
+      DRI_CONF_ANV_FORCE_INDIRECT_DESCRIPTORS(false)
    DRI_CONF_SECTION_END
 
    DRI_CONF_SECTION_DEBUG
@@ -1325,6 +1326,13 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
 
    device->uses_ex_bso = device->info.verx10 >= 125;
 
+   /* For now always use indirect descriptors. We'll update this
+    * to !uses_ex_bso when all the infrastructure is built up.
+    */
+   device->indirect_descriptors =
+      true ||
+      driQueryOptionb(&instance->dri_options, "force_indirect_descriptors");
+
    /* Check if we can read the GPU timestamp register from the CPU */
    uint64_t u64_ignore;
    device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
index 4e3a2c2..14887aa 100644 (file)
@@ -698,23 +698,33 @@ anv_graphics_pipeline_stage_fragment_dynamic(const struct anv_pipeline_stage *st
 }
 
 static void
+anv_pipeline_hash_common(struct mesa_sha1 *ctx,
+                         const struct anv_pipeline *pipeline)
+{
+   struct anv_device *device = pipeline->device;
+
+   _mesa_sha1_update(ctx, pipeline->layout.sha1, sizeof(pipeline->layout.sha1));
+
+   const bool indirect_descriptors = device->physical->indirect_descriptors;
+   _mesa_sha1_update(ctx, &indirect_descriptors, sizeof(indirect_descriptors));
+
+   const bool rba = device->robust_buffer_access;
+   _mesa_sha1_update(ctx, &rba, sizeof(rba));
+}
+
+static void
 anv_pipeline_hash_graphics(struct anv_graphics_base_pipeline *pipeline,
                            struct anv_pipeline_stage *stages,
                            uint32_t view_mask,
                            unsigned char *sha1_out)
 {
+   const struct anv_device *device = pipeline->base.device;
    struct mesa_sha1 ctx;
    _mesa_sha1_init(&ctx);
 
-   _mesa_sha1_update(&ctx, &view_mask, sizeof(view_mask));
-
-   _mesa_sha1_update(&ctx, pipeline->base.layout.sha1,
-                     sizeof(pipeline->base.layout.sha1));
-
-   const struct anv_device *device = pipeline->base.device;
+   anv_pipeline_hash_common(&ctx, &pipeline->base);
 
-   const bool rba = device->robust_buffer_access;
-   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
+   _mesa_sha1_update(&ctx, &view_mask, sizeof(view_mask));
 
    for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
       if (pipeline->base.active_stages & BITFIELD_BIT(s)) {
@@ -737,16 +747,11 @@ anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
                           struct anv_pipeline_stage *stage,
                           unsigned char *sha1_out)
 {
+   const struct anv_device *device = pipeline->base.device;
    struct mesa_sha1 ctx;
    _mesa_sha1_init(&ctx);
 
-   _mesa_sha1_update(&ctx, pipeline->base.layout.sha1,
-                     sizeof(pipeline->base.layout.sha1));
-
-   const struct anv_device *device = pipeline->base.device;
-
-   const bool rba = device->robust_buffer_access;
-   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
+   anv_pipeline_hash_common(&ctx, &pipeline->base);
 
    const bool afs = device->physical->instance->assume_full_subgroups;
    _mesa_sha1_update(&ctx, &afs, sizeof(afs));
@@ -766,11 +771,7 @@ anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
    struct mesa_sha1 ctx;
    _mesa_sha1_init(&ctx);
 
-   _mesa_sha1_update(&ctx, pipeline->base.layout.sha1,
-                     sizeof(pipeline->base.layout.sha1));
-
-   const bool rba = pipeline->base.device->robust_buffer_access;
-   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
+   anv_pipeline_hash_common(&ctx, &pipeline->base);
 
    _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
    _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
index 824b16b..9d294e8 100644 (file)
@@ -912,6 +912,26 @@ struct anv_physical_device {
      */
     bool                                        generated_indirect_draws;
 
+    /**
+     * True if the descriptors buffers are holding one of the following :
+     *    - anv_sampled_image_descriptor
+     *    - anv_storage_image_descriptor
+     *    - anv_address_range_descriptor
+     *
+     * Accessing the descriptors in a bindless fashion from the shader
+     * requires an indirection in the shader, first fetch one of the structure
+     * listed above from the descriptor buffer, then emit the send message to
+     * the fixed function (sampler, dataport, etc...) with the handle fetched
+     * above.
+     *
+     * We need to do things this way prior to DG2 because the bindless surface
+     * state space is limited to 64Mb and some application will allocate more
+     * than what HW can support. On DG2+ we get 4Gb of bindless surface state
+     * and so we can reference directly RENDER_SURFACE_STATE/SAMPLER_STATE
+     * structures instead.
+     */
+    bool                                        indirect_descriptors;
+
     struct {
       uint32_t                                  family_count;
       struct anv_queue_family                   families[ANV_MAX_QUEUE_FAMILIES];
index d1a000e..ac51622 100644 (file)
    DRI_CONF_OPT_I(query_clear_with_blorp_threshold, def, 0, INT32_MAX, \
                   "Indirect threshold count above which we start generating commands")
 
+#define DRI_CONF_ANV_FORCE_INDIRECT_DESCRIPTORS(def) \
+   DRI_CONF_OPT_B(force_indirect_descriptors, def, \
+                  "Use an indirection to access buffer/image/texture/sampler handles")
+
 /**
  * \brief DZN specific configuration options
  */