anv: reduce push constant size for descriptor sets
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 15 Mar 2023 14:10:25 +0000 (16:10 +0200)
committerMarge Bot <emma+marge@anholt.net>
Tue, 30 May 2023 06:36:38 +0000 (06:36 +0000)
Now that descriptor sets are located a in a 1Gb area, we can avoid
storing the whole address to the descriptor and add the base address
of the area to a 32bit offset.

Replay a bunch of fossils with this and changes not really significant
one way or another :

Totals:
Instrs: 9278246 -> 9277148 (-0.01%); split: -0.01%, +0.00%
Cycles: 3547598421 -> 3547579435 (-0.00%); split: -0.00%, +0.00%

Totals from 353 (1.14% of 31021) affected shaders:
Instrs: 581546 -> 580448 (-0.19%); split: -0.23%, +0.04%
Cycles: 25885422 -> 25866436 (-0.07%); split: -0.31%, +0.24%

No difference on send messages or spills/fills.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>

src/intel/compiler/brw_compiler.h
src/intel/vulkan/anv_cmd_buffer.c
src/intel/vulkan/anv_descriptor_set.c
src/intel/vulkan/anv_nir_compute_push_layout.c
src/intel/vulkan/anv_pipeline_cache.c
src/intel/vulkan/anv_private.h

index 77f23cd..45452ba 100644 (file)
@@ -737,6 +737,7 @@ enum brw_shader_reloc_id {
    BRW_SHADER_RELOC_SHADER_START_OFFSET,
    BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
    BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
+   BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
 };
 
 enum brw_shader_reloc_type {
index 8ca61fe..4025be1 100644 (file)
@@ -455,10 +455,10 @@ void anv_CmdBindPipeline(
 
             assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS);
             if (layout->set[s].layout->dynamic_offset_count > 0 &&
-                (push->desc_sets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != layout->set[s].dynamic_offset_start) {
-               push->desc_sets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
-               push->desc_sets[s] |= (layout->set[s].dynamic_offset_start &
-                                      ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
+                (push->desc_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != layout->set[s].dynamic_offset_start) {
+               push->desc_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
+               push->desc_offsets[s] |= (layout->set[s].dynamic_offset_start &
+                                         ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
                modified = true;
             }
          }
@@ -586,15 +586,17 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
       if (update_desc_sets) {
          struct anv_push_constants *push = &pipe_state->push_constants;
 
-         struct anv_address addr = anv_descriptor_set_address(set);
-         push->desc_sets[set_index] &= ~ANV_DESCRIPTOR_SET_ADDRESS_MASK;
-         push->desc_sets[set_index] |= (anv_address_physical(addr) &
-                                        ANV_DESCRIPTOR_SET_ADDRESS_MASK);
+         struct anv_address set_addr = anv_descriptor_set_address(set);
+         uint64_t addr = anv_address_physical(set_addr);
+         uint32_t offset = addr & 0xffffffff;
+         assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0);
+         push->desc_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
+         push->desc_offsets[set_index] |= offset;
 
-         if (addr.bo) {
+         if (set_addr.bo) {
             anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
                                   cmd_buffer->batch.alloc,
-                                  addr.bo);
+                                  set_addr.bo);
          }
       }
 
index ef03750..1380c91 100644 (file)
@@ -1017,10 +1017,11 @@ VkResult anv_CreateDescriptorPool(
          }
       } else {
          VkResult result = anv_device_alloc_bo(device,
-                                               "descriptors",
+                                               "indirect descriptors",
                                                descriptor_bo_size,
                                                ANV_BO_ALLOC_MAPPED |
-                                               ANV_BO_ALLOC_SNOOPED,
+                                               ANV_BO_ALLOC_SNOOPED |
+                                               ANV_BO_ALLOC_DESCRIPTOR_POOL,
                                                0 /* explicit_address */,
                                                &pool->bo);
          if (result != VK_SUCCESS) {
index cea03ca..a7c9e72 100644 (file)
@@ -70,10 +70,10 @@ anv_nir_compute_push_layout(nir_shader *nir,
 
             case nir_intrinsic_load_desc_set_address_intel:
             case nir_intrinsic_load_desc_set_dynamic_index_intel: {
-               unsigned base = offsetof(struct anv_push_constants, desc_sets);
+               unsigned base = offsetof(struct anv_push_constants, desc_offsets);
                push_start = MIN2(push_start, base);
                push_end = MAX2(push_end, base +
-                  sizeof_field(struct anv_push_constants, desc_sets));
+                  sizeof_field(struct anv_push_constants, desc_offsets));
                break;
             }
 
@@ -177,27 +177,30 @@ anv_nir_compute_push_layout(nir_shader *nir,
 
                case nir_intrinsic_load_desc_set_address_intel: {
                   b->cursor = nir_before_instr(&intrin->instr);
-                  nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64,
-                     nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)),
-                     .base = offsetof(struct anv_push_constants, desc_sets),
-                     .range = sizeof_field(struct anv_push_constants, desc_sets),
-                     .dest_type = nir_type_uint64);
-                  pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_ADDRESS_MASK);
-                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load);
+                  nir_ssa_def *pc_load = nir_load_uniform(b, 1, 32,
+                     nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)),
+                     .base = offsetof(struct anv_push_constants, desc_offsets),
+                     .range = sizeof_field(struct anv_push_constants, desc_offsets),
+                     .dest_type = nir_type_uint32);
+                  pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_OFFSET_MASK);
+                  nir_ssa_def *desc_addr =
+                     nir_pack_64_2x32_split(
+                        b, pc_load,
+                        nir_load_reloc_const_intel(
+                           b, BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH));
+                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_addr);
                   break;
                }
 
                case nir_intrinsic_load_desc_set_dynamic_index_intel: {
                   b->cursor = nir_before_instr(&intrin->instr);
-                  nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64,
-                     nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)),
-                     .base = offsetof(struct anv_push_constants, desc_sets),
-                     .range = sizeof_field(struct anv_push_constants, desc_sets),
-                     .dest_type = nir_type_uint64);
-                  pc_load = nir_i2i32(
-                     b,
-                     nir_iand_imm(
-                        b, pc_load, ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK));
+                  nir_ssa_def *pc_load = nir_load_uniform(b, 1, 32,
+                     nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)),
+                     .base = offsetof(struct anv_push_constants, desc_offsets),
+                     .range = sizeof_field(struct anv_push_constants, desc_offsets),
+                     .dest_type = nir_type_uint32);
+                  pc_load = nir_iand_imm(
+                     b, pc_load, ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
                   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load);
                   break;
                }
index 1c492d2..4434248 100644 (file)
@@ -122,7 +122,14 @@ anv_shader_bin_create(struct anv_device *device,
       prog_data_in->const_data_offset;
 
    int rv_count = 0;
-   struct brw_shader_reloc_value reloc_values[5];
+   struct brw_shader_reloc_value reloc_values[6];
+   assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
+   reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
+      .id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
+      .value = device->physical->indirect_descriptors ?
+               (device->physical->va.descriptor_pool.addr >> 32) :
+               (device->physical->va.binding_table_pool.addr >> 32),
+   };
    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
       .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
       .value = shader_data_addr,
index 9f4671b..e9597b9 100644 (file)
@@ -2444,15 +2444,18 @@ struct anv_push_constants {
    /** Ray query globals (RT_DISPATCH_GLOBALS) */
    uint64_t ray_query_globals;
 
-#define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint64_t)ANV_UBO_ALIGNMENT - 1)
-#define ANV_DESCRIPTOR_SET_ADDRESS_MASK       (~(uint64_t)(ANV_UBO_ALIGNMENT - 1))
+#define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1)
+#define ANV_DESCRIPTOR_SET_OFFSET_MASK        (~(uint32_t)(ANV_UBO_ALIGNMENT - 1))
 
    /**
+    * Base offsets for descriptor sets from
+    * INDIRECT_DESCRIPTOR_POOL_MIN_ADDRESS
+    *
     * In bits [0:5] : dynamic offset index in dynamic_offsets[] for the set
     *
     * In bits [6:63] : descriptor set address
     */
-   uint64_t desc_sets[MAX_SETS];
+   uint32_t desc_offsets[MAX_SETS];
 
    union {
       struct {