From 257bf9b6c327058051a27e3b55d4d87106a695d7 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 14 Oct 2022 17:49:58 +0300 Subject: [PATCH] anv: toggle extended bindless surface state on Gfx12.5+ We bump the max surfaces to ~16 million instead of ~1 million on Gfx9-12. We could do more but that'll come later. Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- src/intel/vulkan/anv_descriptor_set.c | 30 ++++++++++++++++++------------ src/intel/vulkan/anv_device.c | 6 +++++- src/intel/vulkan/anv_private.h | 16 ++++++++++++++++ src/intel/vulkan/genX_cmd_buffer.c | 7 +++++-- src/intel/vulkan/genX_state.c | 6 +++--- 5 files changed, 47 insertions(+), 18 deletions(-) diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index c12dd0c..bf4f0e7 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -36,11 +36,6 @@ * Descriptor set layouts. */ -/* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64 - * and we can't put anything else there we use 64b. - */ -#define ANV_SURFACE_STATE_SIZE (64) - static enum anv_descriptor_data anv_descriptor_data_for_type(const struct anv_physical_device *device, VkDescriptorType type) @@ -1418,15 +1413,21 @@ VkResult anv_FreeDescriptorSets( } static uint32_t -anv_surface_state_to_handle(struct anv_state state) +anv_surface_state_to_handle(struct anv_physical_device *device, + struct anv_state state) { /* Bits 31:12 of the bindless surface offset in the extended message * descriptor is bits 25:6 of the byte-based address. */ assert(state.offset >= 0); uint32_t offset = state.offset; - assert((offset & 0x3f) == 0 && offset < (1 << 26)); - return offset << 6; + if (device->uses_ex_bso) { + assert((offset & 0x3f) == 0); + return offset; + } else { + assert((offset & 0x3f) == 0 && offset < (1 << 26)); + return offset << 6; + } } void @@ -1505,7 +1506,8 @@ anv_descriptor_set_write_image_view(struct anv_device *device, (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ? image_view->planes[p].general_sampler_surface_state : image_view->planes[p].optimal_sampler_surface_state; - desc_data[p].image = anv_surface_state_to_handle(sstate.state); + desc_data[p].image = + anv_surface_state_to_handle(device->physical, sstate.state); } } @@ -1529,7 +1531,8 @@ anv_descriptor_set_write_image_view(struct anv_device *device, assert(image_view->n_planes == 1); struct anv_storage_image_descriptor desc_data = { .vanilla = anv_surface_state_to_handle( - image_view->planes[0].storage_surface_state.state), + device->physical, + image_view->planes[0].storage_surface_state.state), }; memcpy(desc_map, &desc_data, sizeof(desc_data)); } @@ -1571,7 +1574,9 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device, if (data & ANV_DESCRIPTOR_SAMPLED_IMAGE) { struct anv_sampled_image_descriptor desc_data = { - .image = anv_surface_state_to_handle(buffer_view->surface_state), + .image = anv_surface_state_to_handle( + device->physical, + buffer_view->surface_state), }; memcpy(desc_map, &desc_data, sizeof(desc_data)); } @@ -1579,7 +1584,8 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device, if (data & ANV_DESCRIPTOR_STORAGE_IMAGE) { struct anv_storage_image_descriptor desc_data = { .vanilla = anv_surface_state_to_handle( - buffer_view->storage_surface_state), + device->physical, + buffer_view->storage_surface_state), }; memcpy(desc_map, &desc_data, sizeof(desc_data)); } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 5fd736f..1c07c45 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1323,6 +1323,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false); + device->uses_ex_bso = device->info.verx10 >= 125; + /* Check if we can read the GPU timestamp register from the CPU */ uint64_t u64_ignore; device->has_reg_timestamp = intel_gem_read_render_timestamp(fd, @@ -1342,6 +1344,7 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, device->compiler->constant_buffer_0_is_relative = false; device->compiler->supports_shader_constants = true; device->compiler->indirect_ubos_use_sampler = device->info.ver < 12; + device->compiler->extended_bindless_surface_offset = device->uses_ex_bso; isl_device_init(&device->isl_dev, &device->info); @@ -1867,7 +1870,8 @@ anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice, * twice a bunch of times (or a bunch of null descriptors), we can safely * advertise a larger limit here. */ - const unsigned max_bindless_views = 1 << 20; + const unsigned max_bindless_views = + anv_physical_device_bindless_heap_size(pdevice) / ANV_SURFACE_STATE_SIZE; p->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views; p->shaderUniformBufferArrayNonUniformIndexingNative = false; p->shaderSampledImageArrayNonUniformIndexingNative = false; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 783def6..824b16b 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -181,6 +181,11 @@ struct intel_perf_query_result; #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16 #define MAX_SAMPLE_LOCATIONS 16 +/* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64 + * and we can't put anything else there we use 64b. + */ +#define ANV_SURFACE_STATE_SIZE (64) + /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model": * * "The surface state model is used when a Binding Table Index (specified @@ -893,6 +898,9 @@ struct anv_physical_device { /** True if we can create protected contexts. */ bool has_protected_contexts; + /**/ + bool uses_ex_bso; + bool always_flush_cache; /** @@ -967,6 +975,14 @@ struct anv_physical_device { struct intel_measure_device measure_device; }; +static inline uint32_t +anv_physical_device_bindless_heap_size(const struct anv_physical_device *device) +{ + return device->uses_ex_bso ? + 128 * 1024 * 1024 /* 128 MiB */ : + 64 * 1024 * 1024 /* 64 MiB */; +} + static inline bool anv_physical_device_has_vram(const struct anv_physical_device *device) { diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 85f2c13..f550053 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -184,8 +184,11 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) sba.DynamicStateBufferSizeModifyEnable = true; sba.InstructionBuffersizeModifyEnable = true; sba.BindlessSurfaceStateBaseAddress = - (struct anv_address) { device->bindless_surface_state_pool.block_pool.bo, 0 }; - sba.BindlessSurfaceStateSize = (1 << 20) - 1; + (struct anv_address) { .offset = + device->physical->va.bindless_surface_state_pool.addr, + }; + sba.BindlessSurfaceStateSize = + anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1; sba.BindlessSurfaceStateMOCS = mocs; sba.BindlessSurfaceStateBaseAddressModifyEnable = true; #if GFX_VER >= 11 diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 1fa0fc2..582553b 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -263,9 +263,9 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { .offset = - device->physical->va.bindless_surface_state_pool.addr, - }; - sba.BindlessSurfaceStateSize = (1 << 20) - 1; + device->physical->va.bindless_surface_state_pool.addr, }; + sba.BindlessSurfaceStateSize = + anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1; sba.BindlessSurfaceStateMOCS = mocs; sba.BindlessSurfaceStateBaseAddressModifyEnable = true; -- 2.7.4