From: Lionel Landwerlin Date: Wed, 15 Mar 2023 14:10:25 +0000 (+0200) Subject: anv: reduce push constant size for descriptor sets X-Git-Tag: upstream/23.3.3~7931 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3f1ff326e00ab67af455a37170248658f0a44ea5;p=platform%2Fupstream%2Fmesa.git anv: reduce push constant size for descriptor sets Now that descriptor sets are located a in a 1Gb area, we can avoid storing the whole address to the descriptor and add the base address of the area to a 32bit offset. Replay a bunch of fossils with this and changes not really significant one way or another : Totals: Instrs: 9278246 -> 9277148 (-0.01%); split: -0.01%, +0.00% Cycles: 3547598421 -> 3547579435 (-0.00%); split: -0.00%, +0.00% Totals from 353 (1.14% of 31021) affected shaders: Instrs: 581546 -> 580448 (-0.19%); split: -0.23%, +0.04% Cycles: 25885422 -> 25866436 (-0.07%); split: -0.31%, +0.24% No difference on send messages or spills/fills. Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 77f23cd..45452ba 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -737,6 +737,7 @@ enum brw_shader_reloc_id { BRW_SHADER_RELOC_SHADER_START_OFFSET, BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW, BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH, + BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH, }; enum brw_shader_reloc_type { diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 8ca61fe..4025be1 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -455,10 +455,10 @@ void anv_CmdBindPipeline( assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS); if (layout->set[s].layout->dynamic_offset_count > 0 && - (push->desc_sets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != layout->set[s].dynamic_offset_start) { - push->desc_sets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK; - push->desc_sets[s] |= (layout->set[s].dynamic_offset_start & - ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK); + (push->desc_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != layout->set[s].dynamic_offset_start) { + push->desc_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK; + push->desc_offsets[s] |= (layout->set[s].dynamic_offset_start & + ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK); modified = true; } } @@ -586,15 +586,17 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer, if (update_desc_sets) { struct anv_push_constants *push = &pipe_state->push_constants; - struct anv_address addr = anv_descriptor_set_address(set); - push->desc_sets[set_index] &= ~ANV_DESCRIPTOR_SET_ADDRESS_MASK; - push->desc_sets[set_index] |= (anv_address_physical(addr) & - ANV_DESCRIPTOR_SET_ADDRESS_MASK); + struct anv_address set_addr = anv_descriptor_set_address(set); + uint64_t addr = anv_address_physical(set_addr); + uint32_t offset = addr & 0xffffffff; + assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0); + push->desc_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK; + push->desc_offsets[set_index] |= offset; - if (addr.bo) { + if (set_addr.bo) { anv_reloc_list_add_bo(cmd_buffer->batch.relocs, cmd_buffer->batch.alloc, - addr.bo); + set_addr.bo); } } diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index ef03750..1380c91 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -1017,10 +1017,11 @@ VkResult anv_CreateDescriptorPool( } } else { VkResult result = anv_device_alloc_bo(device, - "descriptors", + "indirect descriptors", descriptor_bo_size, ANV_BO_ALLOC_MAPPED | - ANV_BO_ALLOC_SNOOPED, + ANV_BO_ALLOC_SNOOPED | + ANV_BO_ALLOC_DESCRIPTOR_POOL, 0 /* explicit_address */, &pool->bo); if (result != VK_SUCCESS) { diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c index cea03ca..a7c9e72 100644 --- a/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -70,10 +70,10 @@ anv_nir_compute_push_layout(nir_shader *nir, case nir_intrinsic_load_desc_set_address_intel: case nir_intrinsic_load_desc_set_dynamic_index_intel: { - unsigned base = offsetof(struct anv_push_constants, desc_sets); + unsigned base = offsetof(struct anv_push_constants, desc_offsets); push_start = MIN2(push_start, base); push_end = MAX2(push_end, base + - sizeof_field(struct anv_push_constants, desc_sets)); + sizeof_field(struct anv_push_constants, desc_offsets)); break; } @@ -177,27 +177,30 @@ anv_nir_compute_push_layout(nir_shader *nir, case nir_intrinsic_load_desc_set_address_intel: { b->cursor = nir_before_instr(&intrin->instr); - nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64, - nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)), - .base = offsetof(struct anv_push_constants, desc_sets), - .range = sizeof_field(struct anv_push_constants, desc_sets), - .dest_type = nir_type_uint64); - pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_ADDRESS_MASK); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load); + nir_ssa_def *pc_load = nir_load_uniform(b, 1, 32, + nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)), + .base = offsetof(struct anv_push_constants, desc_offsets), + .range = sizeof_field(struct anv_push_constants, desc_offsets), + .dest_type = nir_type_uint32); + pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_OFFSET_MASK); + nir_ssa_def *desc_addr = + nir_pack_64_2x32_split( + b, pc_load, + nir_load_reloc_const_intel( + b, BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH)); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_addr); break; } case nir_intrinsic_load_desc_set_dynamic_index_intel: { b->cursor = nir_before_instr(&intrin->instr); - nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64, - nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)), - .base = offsetof(struct anv_push_constants, desc_sets), - .range = sizeof_field(struct anv_push_constants, desc_sets), - .dest_type = nir_type_uint64); - pc_load = nir_i2i32( - b, - nir_iand_imm( - b, pc_load, ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK)); + nir_ssa_def *pc_load = nir_load_uniform(b, 1, 32, + nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)), + .base = offsetof(struct anv_push_constants, desc_offsets), + .range = sizeof_field(struct anv_push_constants, desc_offsets), + .dest_type = nir_type_uint32); + pc_load = nir_iand_imm( + b, pc_load, ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load); break; } diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 1c492d2..4434248 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -122,7 +122,14 @@ anv_shader_bin_create(struct anv_device *device, prog_data_in->const_data_offset; int rv_count = 0; - struct brw_shader_reloc_value reloc_values[5]; + struct brw_shader_reloc_value reloc_values[6]; + assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0); + reloc_values[rv_count++] = (struct brw_shader_reloc_value) { + .id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH, + .value = device->physical->indirect_descriptors ? + (device->physical->va.descriptor_pool.addr >> 32) : + (device->physical->va.binding_table_pool.addr >> 32), + }; reloc_values[rv_count++] = (struct brw_shader_reloc_value) { .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW, .value = shader_data_addr, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 9f4671b..e9597b9 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2444,15 +2444,18 @@ struct anv_push_constants { /** Ray query globals (RT_DISPATCH_GLOBALS) */ uint64_t ray_query_globals; -#define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint64_t)ANV_UBO_ALIGNMENT - 1) -#define ANV_DESCRIPTOR_SET_ADDRESS_MASK (~(uint64_t)(ANV_UBO_ALIGNMENT - 1)) +#define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1) +#define ANV_DESCRIPTOR_SET_OFFSET_MASK (~(uint32_t)(ANV_UBO_ALIGNMENT - 1)) /** + * Base offsets for descriptor sets from + * INDIRECT_DESCRIPTOR_POOL_MIN_ADDRESS + * * In bits [0:5] : dynamic offset index in dynamic_offsets[] for the set * * In bits [6:63] : descriptor set address */ - uint64_t desc_sets[MAX_SETS]; + uint32_t desc_offsets[MAX_SETS]; union { struct {