From c60e94d61ff026a8ac025e109e0debfdea5b290e Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 23 Feb 2023 09:59:45 +0200 Subject: [PATCH] anv: make internal address space allocation more dynamic We're about to manipulate these pools and dealing with the fix address ranges is painful. Signed-off-by: Lionel Landwerlin Reviewed-by: Ivan Briano Part-of: --- docs/envvars.rst | 2 + src/intel/dev/intel_debug.c | 1 + src/intel/dev/intel_debug.h | 1 + src/intel/vulkan/anv_device.c | 61 +++++---- src/intel/vulkan/anv_pipeline_cache.c | 18 +-- src/intel/vulkan/anv_private.h | 89 ++++--------- src/intel/vulkan/anv_va.c | 138 +++++++++++++++++++++ src/intel/vulkan/genX_cmd_buffer.c | 20 ++- .../vulkan/genX_cmd_draw_generated_indirect.h | 13 +- src/intel/vulkan/genX_state.c | 20 ++- src/intel/vulkan/meson.build | 1 + 11 files changed, 250 insertions(+), 114 deletions(-) create mode 100644 src/intel/vulkan/anv_va.c diff --git a/docs/envvars.rst b/docs/envvars.rst index 2ee7ce1..94f24e9 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -486,6 +486,8 @@ Intel driver environment variables dump shader assembly for fragment shaders ``gs`` dump shader assembly for geometry shaders + ``heaps`` + print information about the driver's heaps (Anv only) ``hex`` print instruction hex dump with the disassembly ``l3`` diff --git a/src/intel/dev/intel_debug.c b/src/intel/dev/intel_debug.c index 709f93c..95bb1a7 100644 --- a/src/intel/dev/intel_debug.c +++ b/src/intel/dev/intel_debug.c @@ -101,6 +101,7 @@ static const struct debug_control debug_control[] = { { "capture-all", DEBUG_CAPTURE_ALL }, { "perf-symbol-names", DEBUG_PERF_SYMBOL_NAMES }, { "swsb-stall", DEBUG_SWSB_STALL }, + { "heaps", DEBUG_HEAPS }, { NULL, 0 } }; diff --git a/src/intel/dev/intel_debug.h b/src/intel/dev/intel_debug.h index 64b38b5..03a423d 100644 --- a/src/intel/dev/intel_debug.h +++ b/src/intel/dev/intel_debug.h @@ -91,6 +91,7 @@ extern uint64_t intel_debug; #define DEBUG_CAPTURE_ALL (1ull << 43) #define DEBUG_PERF_SYMBOL_NAMES (1ull << 44) #define DEBUG_SWSB_STALL (1ull << 45) +#define DEBUG_HEAPS (1ull << 46) #define DEBUG_ANY (~0ull) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index d4636ab..653714d 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1414,6 +1414,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, if (result != VK_SUCCESS) goto fail_compiler; + anv_physical_device_init_va_ranges(device); + anv_physical_device_init_disk_cache(device); if (instance->vk.enabled_extensions.KHR_display) { @@ -3064,9 +3066,9 @@ VkResult anv_CreateDevice( decode_get_bo, NULL, device); decoder->engine = physical_device->queue.families[i].engine_class; - decoder->dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS; - decoder->surface_base = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS; - decoder->instruction_base = INSTRUCTION_STATE_POOL_MIN_ADDRESS; + decoder->dynamic_base = physical_device->va.dynamic_state_pool.addr; + decoder->surface_base = physical_device->va.internal_surface_state_pool.addr; + decoder->instruction_base = physical_device->va.instruction_state_pool.addr; } } @@ -3142,18 +3144,16 @@ VkResult anv_CreateDevice( /* keep the page with address zero out of the allocator */ util_vma_heap_init(&device->vma_lo, - LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE); + device->physical->va.low_heap.addr, + device->physical->va.low_heap.size); - util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS, - CLIENT_VISIBLE_HEAP_SIZE); + util_vma_heap_init(&device->vma_cva, + device->physical->va.client_visible_heap.addr, + device->physical->va.client_visible_heap.size); - /* Leave the last 4GiB out of the high vma range, so that no state - * base address + size can overflow 48 bits. For more information see - * the comment about Wa32bitGeneralStateOffset in anv_allocator.c - */ - util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS, - physical_device->gtt_size - (1ull << 32) - - HIGH_HEAP_MIN_ADDRESS); + util_vma_heap_init(&device->vma_hi, + device->physical->va.high_heap.addr, + device->physical->va.high_heap.size); list_inithead(&device->memory_objects); @@ -3191,13 +3191,13 @@ VkResult anv_CreateDevice( */ result = anv_state_pool_init(&device->general_state_pool, device, "general pool", - 0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384); + 0, device->physical->va.general_state_pool.addr, 16384); if (result != VK_SUCCESS) goto fail_batch_bo_pool; result = anv_state_pool_init(&device->dynamic_state_pool, device, "dynamic pool", - DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384); + device->physical->va.dynamic_state_pool.addr, 0, 16384); if (result != VK_SUCCESS) goto fail_general_state_pool; @@ -3214,7 +3214,8 @@ VkResult anv_CreateDevice( result = anv_state_pool_init(&device->instruction_state_pool, device, "instruction pool", - INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384); + device->physical->va.instruction_state_pool.addr, + 0, 16384); if (result != VK_SUCCESS) goto fail_dynamic_state_pool; @@ -3224,25 +3225,29 @@ VkResult anv_CreateDevice( */ result = anv_state_pool_init(&device->scratch_surface_state_pool, device, "scratch surface state pool", - SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096); + device->physical->va.scratch_surface_state_pool.addr, + 0, 4096); if (result != VK_SUCCESS) goto fail_instruction_state_pool; result = anv_state_pool_init(&device->internal_surface_state_pool, device, "internal surface state pool", - INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, - SCRATCH_SURFACE_STATE_POOL_SIZE, 4096); + device->physical->va.internal_surface_state_pool.addr, + device->physical->va.scratch_surface_state_pool.size, + 4096); } else { result = anv_state_pool_init(&device->internal_surface_state_pool, device, "internal surface state pool", - INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096); + device->physical->va.internal_surface_state_pool.addr, + 0, 4096); } if (result != VK_SUCCESS) goto fail_scratch_surface_state_pool; result = anv_state_pool_init(&device->bindless_surface_state_pool, device, "bindless surface state pool", - BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096); + device->physical->va.bindless_surface_state_pool.addr, + 0, 4096); if (result != VK_SUCCESS) goto fail_internal_surface_state_pool; @@ -3252,15 +3257,21 @@ VkResult anv_CreateDevice( */ result = anv_state_pool_init(&device->binding_table_pool, device, "binding table pool", - BINDING_TABLE_POOL_MIN_ADDRESS, 0, + device->physical->va.binding_table_pool.addr, 0, BINDING_TABLE_POOL_BLOCK_SIZE); } else { - int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS - - (int64_t)INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS; + /* The binding table should be in front of the surface states in virtual + * address space so that all surface states can be express as relative + * offsets from the binding table location. + */ + assert(device->physical->va.binding_table_pool.addr < + device->physical->va.internal_surface_state_pool.addr); + int64_t bt_pool_offset = (int64_t)device->physical->va.binding_table_pool.addr - + (int64_t)device->physical->va.internal_surface_state_pool.addr; assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0); result = anv_state_pool_init(&device->binding_table_pool, device, "binding table pool", - INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, + device->physical->va.internal_surface_state_pool.addr, bt_pool_offset, BINDING_TABLE_POOL_BLOCK_SIZE); } diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 1ce5866..e27face 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -115,9 +115,10 @@ anv_shader_bin_create(struct anv_device *device, memcpy(shader->kernel.map, kernel_data, kernel_size); shader->kernel_size = kernel_size; - uint64_t shader_data_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS + - shader->kernel.offset + - prog_data_in->const_data_offset; + uint64_t shader_data_addr = + device->physical->va.instruction_state_pool.addr + + shader->kernel.offset + + prog_data_in->const_data_offset; int rv_count = 0; struct brw_shader_reloc_value reloc_values[5]; @@ -125,10 +126,10 @@ anv_shader_bin_create(struct anv_device *device, .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW, .value = shader_data_addr, }; - assert(shader_data_addr >> 32 == INSTRUCTION_STATE_POOL_MIN_ADDRESS >> 32); + assert(shader_data_addr >> 32 == device->physical->va.instruction_state_pool.addr >> 32); reloc_values[rv_count++] = (struct brw_shader_reloc_value) { .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH, - .value = INSTRUCTION_STATE_POOL_MIN_ADDRESS >> 32 + .value = device->physical->va.instruction_state_pool.addr >> 32, }; reloc_values[rv_count++] = (struct brw_shader_reloc_value) { .id = BRW_SHADER_RELOC_SHADER_START_OFFSET, @@ -137,9 +138,10 @@ anv_shader_bin_create(struct anv_device *device, if (brw_shader_stage_is_bindless(stage)) { const struct brw_bs_prog_data *bs_prog_data = brw_bs_prog_data_const(prog_data_in); - uint64_t resume_sbt_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS + - shader->kernel.offset + - bs_prog_data->resume_sbt_offset; + uint64_t resume_sbt_addr = + device->physical->va.instruction_state_pool.addr + + shader->kernel.offset + + bs_prog_data->resume_sbt_offset; reloc_values[rv_count++] = (struct brw_shader_reloc_value) { .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW, .value = resume_sbt_addr, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index ef22374..2caa994 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -132,69 +132,7 @@ struct intel_perf_query_result; #define NSEC_PER_SEC 1000000000ull -/* anv Virtual Memory Layout - * ========================= - * - * When the anv driver is determining the virtual graphics addresses of memory - * objects itself using the softpin mechanism, the following memory ranges - * will be used. - * - * Three special considerations to notice: - * - * (1) the dynamic state pool is located within the same 4 GiB as the low - * heap. This is to work around a VF cache issue described in a comment in - * anv_physical_device_init_heaps. - * - * (2) the binding table pool is located at lower addresses than the BT - * (binding table) surface state pool, within a 4 GiB range which also - * contains the bindless surface state pool. This allows surface state base - * addresses to cover both binding tables (16 bit offsets), the internal - * surface states (32 bit offsets) and the bindless surface states. - * - * (3) the last 4 GiB of the address space is withheld from the high - * heap. Various hardware units will read past the end of an object for - * various reasons. This healthy margin prevents reads from wrapping around - * 48-bit addresses. - */ -#define GENERAL_STATE_POOL_MIN_ADDRESS 0x000000200000ULL /* 2 MiB */ -#define GENERAL_STATE_POOL_MAX_ADDRESS 0x00003fffffffULL -#define LOW_HEAP_MIN_ADDRESS 0x000040000000ULL /* 1 GiB */ -#define LOW_HEAP_MAX_ADDRESS 0x00007fffffffULL -#define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */ -#define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL -#define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */ -#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL -#define INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */ -#define INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL -#define SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB (8MiB overlaps surface state pool) */ -#define SCRATCH_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001407fffffULL -#define BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB (64MiB) */ -#define BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001c3ffffffULL -#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000200000000ULL /* 8 GiB */ -#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x00023fffffffULL -#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x000240000000ULL /* 9 GiB */ -#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x000a3fffffffULL -#define HIGH_HEAP_MIN_ADDRESS 0x000a40000000ULL /* 41 GiB */ - -#define GENERAL_STATE_POOL_SIZE \ - (GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1) -#define LOW_HEAP_SIZE \ - (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1) -#define DYNAMIC_STATE_POOL_SIZE \ - (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1) -#define BINDING_TABLE_POOL_SIZE \ - (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1) #define BINDING_TABLE_POOL_BLOCK_SIZE (65536) -#define SCRATCH_SURFACE_STATE_POOL_SIZE \ - (SCRATCH_SURFACE_STATE_POOL_MAX_ADDRESS - SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS + 1) -#define BINDLESS_SURFACE_STATE_POOL_SIZE \ - (BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS - BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS + 1) -#define INTERNAL_SURFACE_STATE_POOL_SIZE \ - (INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS - INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS + 1) -#define INSTRUCTION_STATE_POOL_SIZE \ - (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1) -#define CLIENT_VISIBLE_HEAP_SIZE \ - (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1) /* Allowing different clear colors requires us to perform a depth resolve at * the end of certain render passes. This is because while slow clears store @@ -598,6 +536,12 @@ anv_address_map(struct anv_address addr) return addr.bo->map + addr.offset; } +/* Represent a virtual address range */ +struct anv_va_range { + uint64_t addr; + uint64_t size; +}; + /* Represents a lock-free linked list of "free" things. This is used by * both the block pool and the state pools. Unfortunately, in order to * solve the ABA problem, we can't use a single uint32_t head. @@ -985,6 +929,19 @@ struct anv_physical_device { #endif } memory; + struct { + struct anv_va_range general_state_pool; + struct anv_va_range low_heap; + struct anv_va_range dynamic_state_pool; + struct anv_va_range binding_table_pool; + struct anv_va_range internal_surface_state_pool; + struct anv_va_range scratch_surface_state_pool; + struct anv_va_range bindless_surface_state_pool; + struct anv_va_range instruction_state_pool; + struct anv_va_range client_visible_heap; + struct anv_va_range high_heap; + } va; + /* Either we have a single vram region and it's all mappable, or we have * both mappable & non-mappable parts. System memory is always available. */ @@ -1276,10 +1233,11 @@ anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) } static inline struct anv_state -anv_bindless_state_for_binding_table(struct anv_state state) +anv_bindless_state_for_binding_table(struct anv_device *device, + struct anv_state state) { - state.offset += BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS - - INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS; + state.offset += device->physical->va.bindless_surface_state_pool.addr - + device->physical->va.internal_surface_state_pool.addr; return state; } @@ -4401,6 +4359,7 @@ struct anv_performance_configuration_intel { uint64_t config_id; }; +void anv_physical_device_init_va_ranges(struct anv_physical_device *device); void anv_physical_device_init_perf(struct anv_physical_device *device, int fd); void anv_device_perf_init(struct anv_device *device); void anv_perf_write_pass_results(struct intel_perf_config *perf, diff --git a/src/intel/vulkan/anv_va.c b/src/intel/vulkan/anv_va.c new file mode 100644 index 0000000..017569d --- /dev/null +++ b/src/intel/vulkan/anv_va.c @@ -0,0 +1,138 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#include "util/u_math.h" + +static uint64_t +va_add(struct anv_va_range *range, uint64_t addr, uint64_t size) +{ + range->addr = addr; + range->size = size; + + return addr + size; +} + +static void +va_at(struct anv_va_range *range, uint64_t addr, uint64_t size) +{ + range->addr = addr; + range->size = size; +} + +static void +anv_device_print_vas(struct anv_physical_device *device) +{ + fprintf(stderr, "Driver heaps:\n"); +#define PRINT_HEAP(name) \ + fprintf(stderr, " 0x%016"PRIx64"-0x%016"PRIx64": %s\n", \ + device->va.name.addr, \ + device->va.name.addr + device->va.name.size, \ + #name); + PRINT_HEAP(general_state_pool); + PRINT_HEAP(low_heap); + PRINT_HEAP(dynamic_state_pool); + PRINT_HEAP(binding_table_pool); + PRINT_HEAP(internal_surface_state_pool); + PRINT_HEAP(bindless_surface_state_pool); + PRINT_HEAP(instruction_state_pool); + PRINT_HEAP(client_visible_heap); + PRINT_HEAP(high_heap); +} + +void +anv_physical_device_init_va_ranges(struct anv_physical_device *device) +{ + /* anv Virtual Memory Layout + * ========================= + * + * When the anv driver is determining the virtual graphics addresses of + * memory objects itself using the softpin mechanism, the following memory + * ranges will be used. + * + * Three special considerations to notice: + * + * (1) the dynamic state pool is located within the same 4 GiB as the low + * heap. This is to work around a VF cache issue described in a comment in + * anv_physical_device_init_heaps. + * + * (2) the binding table pool is located at lower addresses than the BT + * (binding table) surface state pool, within a 4 GiB range which also + * contains the bindless surface state pool. This allows surface state base + * addresses to cover both binding tables (16 bit offsets), the internal + * surface states (32 bit offsets) and the bindless surface states. + * + * (3) the last 4 GiB of the address space is withheld from the high heap. + * Various hardware units will read past the end of an object for various + * reasons. This healthy margin prevents reads from wrapping around 48-bit + * addresses. + */ + uint64_t _1Mb = 1ull * 1024 * 1024; + uint64_t _1Gb = 1ull * 1024 * 1024 * 1024; + uint64_t _4Gb = 4ull * 1024 * 1024 * 1024; + + uint64_t address = 0x000000200000ULL; /* 2MiB */ + + address = va_add(&device->va.general_state_pool, address, + _1Gb - address); + + address = va_add(&device->va.low_heap, address, _1Gb); + address = va_add(&device->va.dynamic_state_pool, address, _1Gb); + + /* The following addresses have to be located in a 4Gb range so that the + * binding tables can address internal surface states & bindless surface + * states. + */ + address = va_add(&device->va.binding_table_pool, address, _1Gb); + address = va_add(&device->va.internal_surface_state_pool, address, 2 * _1Gb); + /* Scratch surface state overlaps with the internal surface state */ + va_at(&device->va.scratch_surface_state_pool, + device->va.internal_surface_state_pool.addr, + 8 * _1Mb); + address = va_add(&device->va.bindless_surface_state_pool, address, _1Gb); + + /* We use a trick to compute constant data offsets in the shaders to avoid + * unnecessary 64bit address computations (see lower_load_constant() in + * anv_nir_apply_pipeline_layout.c). This assumes the instruction pool is + * located at an address with the lower 32bits at 0. + */ + address = align64(address, _4Gb); + address = va_add(&device->va.instruction_state_pool, address, _1Gb); + + /* Whatever we have left we split in 2 for app allocations client-visible & + * non-client-visible. + * + * Leave the last 4GiB out of the high vma range, so that no state + * base address + size can overflow 48 bits. For more information see + * the comment about Wa32bitGeneralStateOffset in anv_allocator.c + */ + uint64_t user_heaps_size = device->gtt_size - address - 4 * _1Gb; + uint64_t heaps_size_Gb = user_heaps_size / _1Gb / 2 ; + + address = va_add(&device->va.client_visible_heap, address, heaps_size_Gb * _1Gb); + address = va_add(&device->va.high_heap, address, heaps_size_Gb * _1Gb); + + if (INTEL_DEBUG(DEBUG_HEAPS)) + anv_device_print_vas(device); +} diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 9d3aee9..2499694 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -117,7 +117,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) &cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) { btpa.BindingTablePoolBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer); - btpa.BindingTablePoolBufferSize = BINDING_TABLE_POOL_BLOCK_SIZE / 4096; + btpa.BindingTablePoolBufferSize = device->physical->va.binding_table_pool.size / 4096; btpa.MOCS = mocs; } #else /* GFX_VERx10 < 125 */ @@ -177,8 +177,8 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) sba.GeneralStateBufferSize = 0xfffff; sba.IndirectObjectBufferSize = 0xfffff; - sba.DynamicStateBufferSize = DYNAMIC_STATE_POOL_SIZE / 4096; - sba.InstructionBufferSize = INSTRUCTION_STATE_POOL_SIZE / 4096; + sba.DynamicStateBufferSize = device->physical->va.dynamic_state_pool.size / 4096; + sba.InstructionBufferSize = device->physical->va.instruction_state_pool.size / 4096; sba.GeneralStateBufferSizeModifyEnable = true; sba.IndirectObjectBufferSizeModifyEnable = true; sba.DynamicStateBufferSizeModifyEnable = true; @@ -2018,6 +2018,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (shader->push_desc_info.fully_promoted_ubo_descriptors & BITFIELD_BIT(desc_idx)) { surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device, cmd_buffer->device->null_surface_state); break; } @@ -2045,11 +2046,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, desc->image_view->planes[binding->plane].general_sampler_surface_state : desc->image_view->planes[binding->plane].optimal_sampler_surface_state; surface_state = - anv_bindless_state_for_binding_table(sstate.state); + anv_bindless_state_for_binding_table(cmd_buffer->device, sstate.state); assert(surface_state.alloc_size); } else { surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device, cmd_buffer->device->null_surface_state); } break; @@ -2059,10 +2061,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (desc->image_view) { struct anv_surface_state sstate = desc->image_view->planes[binding->plane].storage_surface_state; - surface_state = anv_bindless_state_for_binding_table(sstate.state); + surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device, sstate.state); assert(surface_state.alloc_size); } else { surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device, cmd_buffer->device->null_surface_state); } break; @@ -2075,6 +2079,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, assert(surface_state.alloc_size); } else { surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device, cmd_buffer->device->null_surface_state); } break; @@ -2082,10 +2087,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: if (desc->buffer_view) { surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device, desc->buffer_view->surface_state); assert(surface_state.alloc_size); } else { surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device, cmd_buffer->device->null_surface_state); } break; @@ -2126,6 +2133,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, } else { surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device, cmd_buffer->device->null_surface_state); } break; @@ -2134,10 +2142,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: if (desc->buffer_view) { surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device, desc->buffer_view->storage_surface_state); assert(surface_state.alloc_size); } else { surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device, cmd_buffer->device->null_surface_state); } break; diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h index b4f9c5e..5ca04da 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -284,6 +284,7 @@ genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer) uint32_t *bt_map = cmd_buffer->generation_bt_state.map; bt_map[0] = anv_bindless_state_for_binding_table( + cmd_buffer->device, cmd_buffer->device->null_surface_state).offset + bt_offset; cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; @@ -593,11 +594,13 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer * use the same area. */ if (start_generation_batch) { - genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, 0, - (struct anv_address) { - .offset = DYNAMIC_STATE_POOL_MIN_ADDRESS, - }, - DYNAMIC_STATE_POOL_SIZE); + struct anv_device *device = cmd_buffer->device; + genX(cmd_buffer_set_binding_for_gfx8_vb_flush)( + cmd_buffer, 0, + (struct anv_address) { + .offset = device->physical->va.dynamic_state_pool.addr, + }, + device->physical->va.dynamic_state_pool.size); } struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index e6498e7..ed37403 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -213,13 +213,17 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) sba.StatelessDataPortAccessMOCS = mocs; sba.SurfaceStateBaseAddress = - (struct anv_address) { .offset = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS }; + (struct anv_address) { .offset = + device->physical->va.internal_surface_state_pool.addr, + }; sba.SurfaceStateMOCS = mocs; sba.SurfaceStateBaseAddressModifyEnable = true; sba.DynamicStateBaseAddress = - (struct anv_address) { .offset = DYNAMIC_STATE_POOL_MIN_ADDRESS }; - sba.DynamicStateBufferSize = DYNAMIC_STATE_POOL_SIZE / 4096; + (struct anv_address) { .offset = + device->physical->va.dynamic_state_pool.addr, + }; + sba.DynamicStateBufferSize = device->physical->va.dynamic_state_pool.size / 4096; sba.DynamicStateMOCS = mocs; sba.DynamicStateBaseAddressModifyEnable = true; sba.DynamicStateBufferSizeModifyEnable = true; @@ -231,14 +235,18 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) sba.IndirectObjectBufferSizeModifyEnable = true; sba.InstructionBaseAddress = - (struct anv_address) { .offset = INSTRUCTION_STATE_POOL_MIN_ADDRESS }; - sba.InstructionBufferSize = INSTRUCTION_STATE_POOL_SIZE / 4096; + (struct anv_address) { .offset = + device->physical->va.instruction_state_pool.addr, + }; + sba.InstructionBufferSize = device->physical->va.instruction_state_pool.size / 4096; sba.InstructionMOCS = mocs; sba.InstructionBaseAddressModifyEnable = true; sba.InstructionBuffersizeModifyEnable = true; sba.BindlessSurfaceStateBaseAddress = - (struct anv_address) { .offset = BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS }; + (struct anv_address) { .offset = + device->physical->va.bindless_surface_state_pool.addr, + }; sba.BindlessSurfaceStateSize = (1 << 20) - 1; sba.BindlessSurfaceStateMOCS = mocs; sba.BindlessSurfaceStateBaseAddressModifyEnable = true; diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index fd87eb6..9790515 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -175,6 +175,7 @@ libanv_files = files( 'anv_queue.c', 'anv_util.c', 'anv_utrace.c', + 'anv_va.c', 'anv_video.c', 'anv_wsi.c', ) -- 2.7.4