From 7f1e82306c9b59fe534b9712d85f9324b0bfb5a4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Oct 2021 13:38:19 -0500 Subject: [PATCH] anv: Switch to the new common pipeline cache This patch is intended to be somewhat minimal. There's a lot of cleanup work that can be done but we'll leave that to later patches. Reviewed-by: Connor Abbott Part-of: --- src/intel/vulkan/anv_blorp.c | 28 +- src/intel/vulkan/anv_device.c | 35 +- src/intel/vulkan/anv_pipeline.c | 27 +- src/intel/vulkan/anv_pipeline_cache.c | 637 ++++++---------------------------- src/intel/vulkan/anv_private.h | 67 +--- src/intel/vulkan/genX_pipeline.c | 24 +- 6 files changed, 185 insertions(+), 633 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 9303d0a..4e932ac 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -31,11 +31,8 @@ lookup_blorp_shader(struct blorp_batch *batch, struct blorp_context *blorp = batch->blorp; struct anv_device *device = blorp->driver_ctx; - /* The default cache must be a real cache */ - assert(device->default_pipeline_cache.cache); - struct anv_shader_bin *bin = - anv_pipeline_cache_search(&device->default_pipeline_cache, key, key_size); + anv_pipeline_cache_search(device->blorp_cache, key, key_size); if (!bin) return false; @@ -61,16 +58,13 @@ upload_blorp_shader(struct blorp_batch *batch, uint32_t stage, struct blorp_context *blorp = batch->blorp; struct anv_device *device = blorp->driver_ctx; - /* The blorp cache must be a real cache */ - assert(device->default_pipeline_cache.cache); - struct anv_pipeline_bind_map bind_map = { .surface_count = 0, .sampler_count = 0, }; struct anv_shader_bin *bin = - anv_pipeline_cache_upload_kernel(&device->default_pipeline_cache, stage, + anv_pipeline_cache_upload_kernel(device->blorp_cache, stage, key, key_size, kernel, kernel_size, prog_data, prog_data_size, NULL, 0, NULL, &bind_map); @@ -89,9 +83,23 @@ upload_blorp_shader(struct blorp_batch *batch, uint32_t stage, return true; } -void +bool anv_device_init_blorp(struct anv_device *device) { + /* BLORP needs its own pipeline cache because, unlike the rest of ANV, it + * won't work at all without the cache. It depends on it for shaders to + * remain resident while it runs. Therefore, we need a special cache just + * for BLORP that's forced to always be enabled. + */ + struct vk_pipeline_cache_create_info pcc_info = { + .force_enable = true, + }; + device->blorp_cache = + vk_pipeline_cache_create(&device->vk, &pcc_info, NULL); + if (device->blorp_cache == NULL) + return false; + + const struct blorp_config config = { .use_mesh_shading = device->physical->vk.supported_extensions.NV_mesh_shader, }; @@ -125,11 +133,13 @@ anv_device_init_blorp(struct anv_device *device) default: unreachable("Unknown hardware generation"); } + return true; } void anv_device_finish_blorp(struct anv_device *device) { + vk_pipeline_cache_destroy(device->blorp_cache, NULL); blorp_finish(&device->blorp); } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 77d77cd..5fc4d45 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -609,9 +609,7 @@ anv_physical_device_init_disk_cache(struct anv_physical_device *device) const uint64_t driver_flags = brw_get_compiler_config_value(device->compiler); - device->disk_cache = disk_cache_create(renderer, timestamp, driver_flags); -#else - device->disk_cache = NULL; + device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags); #endif } @@ -619,8 +617,10 @@ static void anv_physical_device_free_disk_cache(struct anv_physical_device *device) { #ifdef ENABLE_SHADER_CACHE - if (device->disk_cache) - disk_cache_destroy(device->disk_cache); + if (device->vk.disk_cache) { + disk_cache_destroy(device->vk.disk_cache); + device->vk.disk_cache = NULL; + } #else assert(device->disk_cache == NULL); #endif @@ -925,6 +925,8 @@ anv_physical_device_try_create(struct anv_instance *instance, assert(st_idx <= ARRAY_SIZE(device->sync_types)); device->vk.supported_sync_types = device->sync_types; + device->vk.pipeline_cache_import_ops = anv_cache_import_ops; + device->always_use_bindless = env_var_as_boolean("ANV_ALWAYS_BINDLESS", false); @@ -1134,9 +1136,6 @@ VkResult anv_CreateInstance( instance->physical_devices_enumerated = false; list_inithead(&instance->physical_devices); - instance->pipeline_cache_enabled = - env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true); - VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); anv_init_dri_options(instance); @@ -3436,14 +3435,22 @@ VkResult anv_CreateDevice( if (result != VK_SUCCESS) goto fail_trivial_batch_bo_and_scratch_pool; - anv_pipeline_cache_init(&device->default_pipeline_cache, device, - true /* cache_enabled */, false /* external_sync */); + struct vk_pipeline_cache_create_info pcc_info = { }; + device->default_pipeline_cache = + vk_pipeline_cache_create(&device->vk, &pcc_info, NULL); + if (!device->default_pipeline_cache) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_trivial_batch_bo_and_scratch_pool; + } result = anv_device_init_rt_shaders(device); if (result != VK_SUCCESS) goto fail_default_pipeline_cache; - anv_device_init_blorp(device); + if (!anv_device_init_blorp(device)) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_rt_shaders; + } anv_device_init_border_colors(device); @@ -3455,8 +3462,10 @@ VkResult anv_CreateDevice( return VK_SUCCESS; + fail_rt_shaders: + anv_device_finish_rt_shaders(device); fail_default_pipeline_cache: - anv_pipeline_cache_finish(&device->default_pipeline_cache); + vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL); fail_trivial_batch_bo_and_scratch_pool: anv_scratch_pool_finish(device, &device->scratch_pool); fail_trivial_batch: @@ -3528,7 +3537,7 @@ void anv_DestroyDevice( anv_device_finish_rt_shaders(device); - anv_pipeline_cache_finish(&device->default_pipeline_cache); + vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL); #ifdef HAVE_VALGRIND /* We only need to free these to prevent valgrind errors. The backing diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index eb5da15..2004246 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -697,7 +697,7 @@ anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *p static nir_shader * anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, void *mem_ctx, struct anv_pipeline_stage *stage) { @@ -1418,7 +1418,7 @@ anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline) static VkResult anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *info, const VkPipelineRenderingCreateInfo *rendering_info) { @@ -1586,7 +1586,8 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, */ assert(found < __builtin_popcount(pipeline->active_stages)); - vk_perf(VK_LOG_OBJS(&cache->base), + vk_perf(VK_LOG_OBJS(cache ? &cache->base : + &pipeline->base.device->vk.base), "Found a partial pipeline in the cache. This is " "most likely caused by an incomplete pipeline cache " "import or export"); @@ -1903,7 +1904,7 @@ fail: VkResult anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const VkComputePipelineCreateInfo *info, const struct vk_shader_module *module, const char *entrypoint, @@ -2395,7 +2396,7 @@ vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT * VkResult anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const VkPipelineRenderingCreateInfo *rendering_info, const VkAllocationCallbacks *alloc) @@ -2568,7 +2569,7 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, static VkResult compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, nir_shader *nir, struct anv_pipeline_stage *stage, struct anv_shader_bin **shader_out, @@ -2777,7 +2778,7 @@ anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline, static bool anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const VkRayTracingPipelineCreateInfoKHR *info, struct anv_pipeline_stage *stages, uint32_t *stack_max) @@ -2820,7 +2821,7 @@ anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline, static VkResult anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const VkRayTracingPipelineCreateInfoKHR *info) { const struct intel_device_info *devinfo = &pipeline->base.device->info; @@ -3040,7 +3041,7 @@ anv_device_init_rt_shaders(struct anv_device *device) }, }; device->rt_trampoline = - anv_device_search_for_kernel(device, &device->default_pipeline_cache, + anv_device_search_for_kernel(device, device->default_pipeline_cache, &trampoline_key, sizeof(trampoline_key), &cache_hit); if (device->rt_trampoline == NULL) { @@ -3070,7 +3071,7 @@ anv_device_init_rt_shaders(struct anv_device *device) brw_compile_cs(device->physical->compiler, tmp_ctx, ¶ms); device->rt_trampoline = - anv_device_upload_kernel(device, &device->default_pipeline_cache, + anv_device_upload_kernel(device, device->default_pipeline_cache, MESA_SHADER_COMPUTE, &trampoline_key, sizeof(trampoline_key), tramp_data, @@ -3092,7 +3093,7 @@ anv_device_init_rt_shaders(struct anv_device *device) .name = "rt-trivial-ret", }; device->rt_trivial_return = - anv_device_search_for_kernel(device, &device->default_pipeline_cache, + anv_device_search_for_kernel(device, device->default_pipeline_cache, &return_key, sizeof(return_key), &cache_hit); if (device->rt_trivial_return == NULL) { @@ -3118,7 +3119,7 @@ anv_device_init_rt_shaders(struct anv_device *device) brw_compile_bs(device->physical->compiler, tmp_ctx, ¶ms); device->rt_trivial_return = - anv_device_upload_kernel(device, &device->default_pipeline_cache, + anv_device_upload_kernel(device, device->default_pipeline_cache, MESA_SHADER_CALLABLE, &return_key, sizeof(return_key), return_data, return_prog_data.base.program_size, @@ -3148,7 +3149,7 @@ anv_device_finish_rt_shaders(struct anv_device *device) VkResult anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline, struct anv_device *device, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *alloc) { diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 0b78f1b..31c2d8f 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -31,6 +31,39 @@ #include "nir/nir_xfb_info.h" #include "vulkan/util/vk_util.h" +static bool +anv_shader_bin_serialize(struct vk_pipeline_cache_object *object, + struct blob *blob); + +struct vk_pipeline_cache_object * +anv_shader_bin_deserialize(struct vk_device *device, + const void *key_data, size_t key_size, + struct blob_reader *blob); + +static void +anv_shader_bin_destroy(struct vk_pipeline_cache_object *object) +{ + struct anv_device *device = + container_of(object->device, struct anv_device, vk); + struct anv_shader_bin *shader = + container_of(object, struct anv_shader_bin, base); + + anv_state_pool_free(&device->instruction_state_pool, shader->kernel); + vk_pipeline_cache_object_finish(&shader->base); + vk_free(&device->vk.alloc, shader); +} + +static const struct vk_pipeline_cache_object_ops anv_shader_bin_ops = { + .serialize = anv_shader_bin_serialize, + .deserialize = anv_shader_bin_deserialize, + .destroy = anv_shader_bin_destroy, +}; + +const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2] = { + &anv_shader_bin_ops, + NULL +}; + struct anv_shader_bin * anv_shader_bin_create(struct anv_device *device, gl_shader_stage stage, @@ -44,8 +77,7 @@ anv_shader_bin_create(struct anv_device *device, { VK_MULTIALLOC(ma); VK_MULTIALLOC_DECL(&ma, struct anv_shader_bin, shader, 1); - VK_MULTIALLOC_DECL_SIZE(&ma, struct anv_shader_bin_key, key, - sizeof(*key) + key_size); + VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size); VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data, prog_data_size); VK_MULTIALLOC_DECL(&ma, struct brw_shader_reloc, prog_data_relocs, @@ -65,14 +97,12 @@ anv_shader_bin_create(struct anv_device *device, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) return NULL; - shader->ref_cnt = 1; + memcpy(obj_key_data, key_data, key_size); + vk_pipeline_cache_object_init(&device->vk, &shader->base, + &anv_shader_bin_ops, obj_key_data, key_size); shader->stage = stage; - key->size = key_size; - memcpy(key->data, key_data, key_size); - shader->key = key; - shader->kernel = anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64); memcpy(shader->kernel.map, kernel_data, kernel_size); @@ -149,23 +179,14 @@ anv_shader_bin_create(struct anv_device *device, return shader; } -void -anv_shader_bin_destroy(struct anv_device *device, - struct anv_shader_bin *shader) -{ - assert(shader->ref_cnt == 0); - anv_state_pool_free(&device->instruction_state_pool, shader->kernel); - vk_free(&device->vk.alloc, shader); -} - static bool -anv_shader_bin_write_to_blob(const struct anv_shader_bin *shader, - struct blob *blob) +anv_shader_bin_serialize(struct vk_pipeline_cache_object *object, + struct blob *blob) { - blob_write_uint32(blob, shader->stage); + struct anv_shader_bin *shader = + container_of(object, struct anv_shader_bin, base); - blob_write_uint32(blob, shader->key->size); - blob_write_bytes(blob, shader->key->data, shader->key->size); + blob_write_uint32(blob, shader->stage); blob_write_uint32(blob, shader->kernel_size); blob_write_bytes(blob, shader->kernel.map, shader->kernel_size); @@ -209,14 +230,15 @@ anv_shader_bin_write_to_blob(const struct anv_shader_bin *shader, return !blob->out_of_memory; } -static struct anv_shader_bin * -anv_shader_bin_create_from_blob(struct anv_device *device, - struct blob_reader *blob) +struct vk_pipeline_cache_object * +anv_shader_bin_deserialize(struct vk_device *vk_device, + const void *key_data, size_t key_size, + struct blob_reader *blob) { - gl_shader_stage stage = blob_read_uint32(blob); + struct anv_device *device = + container_of(vk_device, struct anv_device, vk); - uint32_t key_size = blob_read_uint32(blob); - const void *key_data = blob_read_bytes(blob, key_size); + gl_shader_stage stage = blob_read_uint32(blob); uint32_t kernel_size = blob_read_uint32(blob); const void *kernel_data = blob_read_bytes(blob, kernel_size); @@ -259,205 +281,33 @@ anv_shader_bin_create_from_blob(struct anv_device *device, if (blob->overrun) return NULL; - return anv_shader_bin_create(device, stage, - key_data, key_size, - kernel_data, kernel_size, - &prog_data.base, prog_data_size, - stats, num_stats, xfb_info, &bind_map); -} - -/* Remaining work: - * - * - Compact binding table layout so it's tight and not dependent on - * descriptor set layout. - * - * - Review prog_data struct for size and cacheability: struct - * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8 - * bit quantities etc; use bit fields for all bools, eg dual_src_blend. - */ - -static uint32_t -shader_bin_key_hash_func(const void *void_key) -{ - const struct anv_shader_bin_key *key = void_key; - return _mesa_hash_data(key->data, key->size); -} - -static bool -shader_bin_key_compare_func(const void *void_a, const void *void_b) -{ - const struct anv_shader_bin_key *a = void_a, *b = void_b; - if (a->size != b->size) - return false; - - return memcmp(a->data, b->data, a->size) == 0; -} - -static uint32_t -sha1_hash_func(const void *sha1) -{ - return _mesa_hash_data(sha1, 20); -} - -static bool -sha1_compare_func(const void *sha1_a, const void *sha1_b) -{ - return memcmp(sha1_a, sha1_b, 20) == 0; -} - -void -anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device, - bool cache_enabled, - bool external_sync) -{ - vk_object_base_init(&device->vk, &cache->base, - VK_OBJECT_TYPE_PIPELINE_CACHE); - cache->device = device; - cache->external_sync = external_sync; - pthread_mutex_init(&cache->mutex, NULL); - - if (cache_enabled) { - cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func, - shader_bin_key_compare_func); - cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func, - sha1_compare_func); - } else { - cache->cache = NULL; - cache->nir_cache = NULL; - } -} - -void -anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) -{ - pthread_mutex_destroy(&cache->mutex); - - if (cache->cache) { - /* This is a bit unfortunate. In order to keep things from randomly - * going away, the shader cache has to hold a reference to all shader - * binaries it contains. We unref them when we destroy the cache. - */ - hash_table_foreach(cache->cache, entry) - anv_shader_bin_unref(cache->device, entry->data); - - _mesa_hash_table_destroy(cache->cache, NULL); - } - - if (cache->nir_cache) { - hash_table_foreach(cache->nir_cache, entry) - ralloc_free(entry->data); - - _mesa_hash_table_destroy(cache->nir_cache, NULL); - } - - vk_object_base_finish(&cache->base); -} - -static struct anv_shader_bin * -anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache, - const void *key_data, uint32_t key_size) -{ - uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))]; - struct anv_shader_bin_key *key = (void *)vla; - key->size = key_size; - memcpy(key->data, key_data, key_size); - - struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key); - if (entry) - return entry->data; - else + struct anv_shader_bin *shader = + anv_shader_bin_create(device, stage, + key_data, key_size, + kernel_data, kernel_size, + &prog_data.base, prog_data_size, + stats, num_stats, xfb_info, &bind_map); + if (shader == NULL) return NULL; -} -static inline void -anv_cache_lock(struct anv_pipeline_cache *cache) -{ - if (!cache->external_sync) - pthread_mutex_lock(&cache->mutex); -} - -static inline void -anv_cache_unlock(struct anv_pipeline_cache *cache) -{ - if (!cache->external_sync) - pthread_mutex_unlock(&cache->mutex); + return &shader->base; } struct anv_shader_bin * -anv_pipeline_cache_search(struct anv_pipeline_cache *cache, +anv_pipeline_cache_search(struct vk_pipeline_cache *cache, const void *key_data, uint32_t key_size) { - if (!cache->cache) + struct vk_pipeline_cache_object *object = + vk_pipeline_cache_lookup_object(cache, key_data, key_size, + &anv_shader_bin_ops, NULL); + if (object == NULL) return NULL; - anv_cache_lock(cache); - - struct anv_shader_bin *shader = - anv_pipeline_cache_search_locked(cache, key_data, key_size); - - anv_cache_unlock(cache); - - /* We increment refcount before handing it to the caller */ - if (shader) - anv_shader_bin_ref(shader); - - return shader; -} - -static void -anv_pipeline_cache_add_shader_bin(struct anv_pipeline_cache *cache, - struct anv_shader_bin *bin) -{ - if (!cache->cache) - return; - - anv_cache_lock(cache); - - struct hash_entry *entry = _mesa_hash_table_search(cache->cache, bin->key); - if (entry == NULL) { - /* Take a reference for the cache */ - anv_shader_bin_ref(bin); - _mesa_hash_table_insert(cache->cache, bin->key, bin); - } - - anv_cache_unlock(cache); -} - -static struct anv_shader_bin * -anv_pipeline_cache_add_shader_locked(struct anv_pipeline_cache *cache, - gl_shader_stage stage, - const void *key_data, uint32_t key_size, - const void *kernel_data, - uint32_t kernel_size, - const struct brw_stage_prog_data *prog_data, - uint32_t prog_data_size, - const struct brw_compile_stats *stats, - uint32_t num_stats, - const nir_xfb_info *xfb_info, - const struct anv_pipeline_bind_map *bind_map) -{ - struct anv_shader_bin *shader = - anv_pipeline_cache_search_locked(cache, key_data, key_size); - if (shader) - return shader; - - struct anv_shader_bin *bin = - anv_shader_bin_create(cache->device, stage, - key_data, key_size, - kernel_data, kernel_size, - prog_data, prog_data_size, - stats, num_stats, xfb_info, bind_map); - if (!bin) - return NULL; - - _mesa_hash_table_insert(cache->cache, bin->key, bin); - - return bin; + return container_of(object, struct anv_shader_bin, base); } struct anv_shader_bin * -anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, +anv_pipeline_cache_upload_kernel(struct vk_pipeline_cache *cache, gl_shader_stage stage, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, @@ -468,257 +318,48 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map) { - if (cache->cache) { - anv_cache_lock(cache); - - struct anv_shader_bin *bin = - anv_pipeline_cache_add_shader_locked(cache, stage, key_data, key_size, - kernel_data, kernel_size, - prog_data, prog_data_size, - stats, num_stats, - xfb_info, bind_map); - - anv_cache_unlock(cache); - - /* We increment refcount before handing it to the caller */ - if (bin) - anv_shader_bin_ref(bin); - - return bin; - } else { - /* In this case, we're not caching it so the caller owns it entirely */ - return anv_shader_bin_create(cache->device, stage, - key_data, key_size, - kernel_data, kernel_size, - prog_data, prog_data_size, - stats, num_stats, - xfb_info, bind_map); - } -} - -static void -anv_pipeline_cache_load(struct anv_pipeline_cache *cache, - const void *data, size_t size) -{ - struct anv_device *device = cache->device; - struct anv_physical_device *pdevice = device->physical; - - if (cache->cache == NULL) - return; - - struct blob_reader blob; - blob_reader_init(&blob, data, size); - - struct vk_pipeline_cache_header header; - blob_copy_bytes(&blob, &header, sizeof(header)); - uint32_t count = blob_read_uint32(&blob); - if (blob.overrun) - return; - - if (header.header_size < sizeof(header)) - return; - if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) - return; - if (header.vendor_id != 0x8086) - return; - if (header.device_id != device->info.pci_device_id) - return; - if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0) - return; - - for (uint32_t i = 0; i < count; i++) { - struct anv_shader_bin *bin = - anv_shader_bin_create_from_blob(device, &blob); - if (!bin) - break; - _mesa_hash_table_insert(cache->cache, bin->key, bin); - } -} - -VkResult anv_CreatePipelineCache( - VkDevice _device, - const VkPipelineCacheCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipelineCache* pPipelineCache) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline_cache *cache; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); - - cache = vk_alloc2(&device->vk.alloc, pAllocator, - sizeof(*cache), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (cache == NULL) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_pipeline_cache_init(cache, device, - device->physical->instance->pipeline_cache_enabled, - pCreateInfo->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT); - - if (pCreateInfo->initialDataSize > 0) - anv_pipeline_cache_load(cache, - pCreateInfo->pInitialData, - pCreateInfo->initialDataSize); - - *pPipelineCache = anv_pipeline_cache_to_handle(cache); - - return VK_SUCCESS; -} - -void anv_DestroyPipelineCache( - VkDevice _device, - VkPipelineCache _cache, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - - if (!cache) - return; - - anv_pipeline_cache_finish(cache); - - vk_free2(&device->vk.alloc, pAllocator, cache); -} - -VkResult anv_GetPipelineCacheData( - VkDevice _device, - VkPipelineCache _cache, - size_t* pDataSize, - void* pData) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - - struct blob blob; - if (pData) { - blob_init_fixed(&blob, pData, *pDataSize); - } else { - blob_init_fixed(&blob, NULL, SIZE_MAX); - } - - struct vk_pipeline_cache_header header = { - .header_size = sizeof(struct vk_pipeline_cache_header), - .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, - .vendor_id = 0x8086, - .device_id = device->info.pci_device_id, - }; - memcpy(header.uuid, device->physical->pipeline_cache_uuid, VK_UUID_SIZE); - blob_write_bytes(&blob, &header, sizeof(header)); - - uint32_t count = 0; - intptr_t count_offset = blob_reserve_uint32(&blob); - if (count_offset < 0) { - *pDataSize = 0; - blob_finish(&blob); - return VK_INCOMPLETE; - } - - VkResult result = VK_SUCCESS; - if (cache->cache) { - hash_table_foreach(cache->cache, entry) { - struct anv_shader_bin *shader = entry->data; - - size_t save_size = blob.size; - if (!anv_shader_bin_write_to_blob(shader, &blob)) { - /* If it fails reset to the previous size and bail */ - blob.size = save_size; - result = VK_INCOMPLETE; - break; - } - - count++; - } - } - - blob_overwrite_uint32(&blob, count_offset, count); - - *pDataSize = blob.size; - - blob_finish(&blob); - - return result; -} - -VkResult anv_MergePipelineCaches( - VkDevice _device, - VkPipelineCache destCache, - uint32_t srcCacheCount, - const VkPipelineCache* pSrcCaches) -{ - ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); + struct anv_device *device = + container_of(cache->base.device, struct anv_device, vk); - if (!dst->cache) - return VK_SUCCESS; - - for (uint32_t i = 0; i < srcCacheCount; i++) { - ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); - if (!src->cache) - continue; - - hash_table_foreach(src->cache, entry) { - struct anv_shader_bin *bin = entry->data; - assert(bin); - - if (_mesa_hash_table_search(dst->cache, bin->key)) - continue; + struct anv_shader_bin *shader = + anv_shader_bin_create(device, stage, + key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, + stats, num_stats, + xfb_info, bind_map); + if (shader == NULL) + return NULL; - anv_shader_bin_ref(bin); - _mesa_hash_table_insert(dst->cache, bin->key, bin); - } - } + struct vk_pipeline_cache_object *cached = + vk_pipeline_cache_add_object(cache, &shader->base); - return VK_SUCCESS; + return container_of(cached, struct anv_shader_bin, base); } struct anv_shader_bin * anv_device_search_for_kernel(struct anv_device *device, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const void *key_data, uint32_t key_size, bool *user_cache_hit) { - struct anv_shader_bin *bin; - *user_cache_hit = false; - if (cache) { - bin = anv_pipeline_cache_search(cache, key_data, key_size); - if (bin) { - *user_cache_hit = cache != &device->default_pipeline_cache; - return bin; - } - } + if (cache == NULL) + return NULL; -#ifdef ENABLE_SHADER_CACHE - struct disk_cache *disk_cache = device->physical->disk_cache; - if (disk_cache && device->physical->instance->pipeline_cache_enabled) { - cache_key cache_key; - disk_cache_compute_key(disk_cache, key_data, key_size, cache_key); - - size_t buffer_size; - uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size); - if (buffer) { - struct blob_reader blob; - blob_reader_init(&blob, buffer, buffer_size); - bin = anv_shader_bin_create_from_blob(device, &blob); - free(buffer); - - if (bin) { - if (cache) - anv_pipeline_cache_add_shader_bin(cache, bin); - return bin; - } - } - } -#endif + struct vk_pipeline_cache_object *object = + vk_pipeline_cache_lookup_object(cache, key_data, key_size, + &anv_shader_bin_ops, user_cache_hit); + if (object == NULL) + return NULL; - return NULL; + return container_of(object, struct anv_shader_bin, base); } struct anv_shader_bin * anv_device_upload_kernel(struct anv_device *device, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, gl_shader_stage stage, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, @@ -747,109 +388,33 @@ anv_device_upload_kernel(struct anv_device *device, if (bin == NULL) return NULL; -#ifdef ENABLE_SHADER_CACHE - struct disk_cache *disk_cache = device->physical->disk_cache; - if (disk_cache) { - struct blob binary; - blob_init(&binary); - if (anv_shader_bin_write_to_blob(bin, &binary)) { - cache_key cache_key; - disk_cache_compute_key(disk_cache, key_data, key_size, cache_key); - - disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL); - } - - blob_finish(&binary); - } -#endif - return bin; } -struct serialized_nir { - unsigned char sha1_key[20]; - size_t size; - char data[0]; -}; +#define SHA1_KEY_SIZE 20 struct nir_shader * anv_device_search_for_nir(struct anv_device *device, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const nir_shader_compiler_options *nir_options, - unsigned char sha1_key[20], + unsigned char sha1_key[SHA1_KEY_SIZE], void *mem_ctx) { - if (cache && cache->nir_cache) { - const struct serialized_nir *snir = NULL; - - anv_cache_lock(cache); - struct hash_entry *entry = - _mesa_hash_table_search(cache->nir_cache, sha1_key); - if (entry) - snir = entry->data; - anv_cache_unlock(cache); - - if (snir) { - struct blob_reader blob; - blob_reader_init(&blob, snir->data, snir->size); - - nir_shader *nir = nir_deserialize(mem_ctx, nir_options, &blob); - if (blob.overrun) { - ralloc_free(nir); - } else { - return nir; - } - } - } + if (cache == NULL) + return false; - return NULL; + return vk_pipeline_cache_lookup_nir(cache, sha1_key, SHA1_KEY_SIZE, + nir_options, NULL, mem_ctx); } void anv_device_upload_nir(struct anv_device *device, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const struct nir_shader *nir, - unsigned char sha1_key[20]) + unsigned char sha1_key[SHA1_KEY_SIZE]) { - if (cache && cache->nir_cache) { - anv_cache_lock(cache); - struct hash_entry *entry = - _mesa_hash_table_search(cache->nir_cache, sha1_key); - anv_cache_unlock(cache); - if (entry) - return; - - struct blob blob; - blob_init(&blob); - - nir_serialize(&blob, nir, false); - if (blob.out_of_memory) { - blob_finish(&blob); - return; - } - - anv_cache_lock(cache); - /* Because ralloc isn't thread-safe, we have to do all this inside the - * lock. We could unlock for the big memcpy but it's probably not worth - * the hassle. - */ - entry = _mesa_hash_table_search(cache->nir_cache, sha1_key); - if (entry) { - blob_finish(&blob); - anv_cache_unlock(cache); - return; - } - - struct serialized_nir *snir = - ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size); - memcpy(snir->sha1_key, sha1_key, 20); - snir->size = blob.size; - memcpy(snir->data, blob.data, blob.size); - - blob_finish(&blob); - - _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir); - - anv_cache_unlock(cache); - } + if (cache == NULL) + return; + + vk_pipeline_cache_add_nir(cache, sha1_key, SHA1_KEY_SIZE, nir); } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index f204420..96b2105 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -76,6 +76,7 @@ #include "vk_framebuffer.h" #include "vk_image.h" #include "vk_instance.h" +#include "vk_pipeline_cache.h" #include "vk_physical_device.h" #include "vk_shader_module.h" #include "vk_sync.h" @@ -1047,8 +1048,6 @@ struct anv_physical_device { struct vk_sync_timeline_type sync_timeline_type; const struct vk_sync_type * sync_types[4]; - struct disk_cache * disk_cache; - struct wsi_device wsi_device; int local_fd; bool has_local; @@ -1078,8 +1077,6 @@ struct anv_instance { bool physical_devices_enumerated; struct list_head physical_devices; - bool pipeline_cache_enabled; - struct driOptionCache dri_options; struct driOptionCache available_dri_options; }; @@ -1104,32 +1101,16 @@ struct anv_queue { struct intel_ds_queue * ds; }; -struct anv_pipeline_cache { - struct vk_object_base base; - struct anv_device * device; - pthread_mutex_t mutex; - - struct hash_table * nir_cache; - - struct hash_table * cache; - - bool external_sync; -}; - struct nir_xfb_info; struct anv_pipeline_bind_map; -void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device, - bool cache_enabled, - bool external_sync); -void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); +extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2]; struct anv_shader_bin * -anv_pipeline_cache_search(struct anv_pipeline_cache *cache, +anv_pipeline_cache_search(struct vk_pipeline_cache *cache, const void *key, uint32_t key_size); struct anv_shader_bin * -anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, +anv_pipeline_cache_upload_kernel(struct vk_pipeline_cache *cache, gl_shader_stage stage, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, @@ -1142,13 +1123,13 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, struct anv_shader_bin * anv_device_search_for_kernel(struct anv_device *device, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const void *key_data, uint32_t key_size, bool *user_cache_bit); struct anv_shader_bin * anv_device_upload_kernel(struct anv_device *device, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, gl_shader_stage stage, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, @@ -1164,14 +1145,14 @@ struct nir_shader_compiler_options; struct nir_shader * anv_device_search_for_nir(struct anv_device *device, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const struct nir_shader_compiler_options *nir_options, unsigned char sha1_key[20], void *mem_ctx); void anv_device_upload_nir(struct anv_device *device, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const struct nir_shader *nir, unsigned char sha1_key[20]); @@ -1221,7 +1202,8 @@ struct anv_device { struct anv_bo * trivial_batch_bo; struct anv_state null_surface_state; - struct anv_pipeline_cache default_pipeline_cache; + struct vk_pipeline_cache * default_pipeline_cache; + struct vk_pipeline_cache * blorp_cache; struct blorp_context blorp; struct anv_state border_colors; @@ -1342,7 +1324,7 @@ anv_mocs(const struct anv_device *device, return isl_mocs(&device->isl_dev, usage, bo && bo->is_external); } -void anv_device_init_blorp(struct anv_device *device); +bool anv_device_init_blorp(struct anv_device *device); void anv_device_finish_blorp(struct anv_device *device); enum anv_bo_alloc_flags { @@ -3251,18 +3233,11 @@ struct anv_pipeline_bind_map { struct anv_push_range push_ranges[4]; }; -struct anv_shader_bin_key { - uint32_t size; - uint8_t data[0]; -}; - struct anv_shader_bin { - uint32_t ref_cnt; + struct vk_pipeline_cache_object base; gl_shader_stage stage; - const struct anv_shader_bin_key *key; - struct anv_state kernel; uint32_t kernel_size; @@ -3288,22 +3263,16 @@ anv_shader_bin_create(struct anv_device *device, const struct nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map); -void -anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader); - static inline void anv_shader_bin_ref(struct anv_shader_bin *shader) { - assert(shader && shader->ref_cnt >= 1); - p_atomic_inc(&shader->ref_cnt); + vk_pipeline_cache_object_ref(&shader->base); } static inline void anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader) { - assert(shader && shader->ref_cnt >= 1); - if (p_atomic_dec_zero(&shader->ref_cnt)) - anv_shader_bin_destroy(device, shader); + vk_pipeline_cache_object_unref(&shader->base); } #define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \ @@ -3611,14 +3580,14 @@ anv_pipeline_finish(struct anv_pipeline *pipeline, VkResult anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const VkPipelineRenderingCreateInfo *rendering_info, const VkAllocationCallbacks *alloc); VkResult anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const VkComputePipelineCreateInfo *info, const struct vk_shader_module *module, const char *entrypoint, @@ -3627,7 +3596,7 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, VkResult anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline, struct anv_device *device, - struct anv_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *alloc); @@ -4595,8 +4564,6 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE) VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView, VK_OBJECT_TYPE_IMAGE_VIEW); -VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, base, VkPipelineCache, - VK_OBJECT_TYPE_PIPELINE_CACHE) VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline, VK_OBJECT_TYPE_PIPELINE) VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout, diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 4d9e0a4..91aa50b 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -2780,7 +2780,7 @@ emit_mesh_state(struct anv_graphics_pipeline *pipeline) static VkResult genX(graphics_pipeline_create)( VkDevice _device, - struct anv_pipeline_cache * cache, + struct vk_pipeline_cache * cache, const VkGraphicsPipelineCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) @@ -2792,8 +2792,8 @@ genX(graphics_pipeline_create)( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); /* Use the default pipeline cache if none is specified */ - if (cache == NULL && device->physical->instance->pipeline_cache_enabled) - cache = &device->default_pipeline_cache; + if (cache == NULL) + cache = device->default_pipeline_cache; pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -3088,7 +3088,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline, static VkResult compute_pipeline_create( VkDevice _device, - struct anv_pipeline_cache * cache, + struct vk_pipeline_cache * cache, const VkComputePipelineCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) @@ -3100,8 +3100,8 @@ compute_pipeline_create( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); /* Use the default pipeline cache if none is specified */ - if (cache == NULL && device->physical->instance->pipeline_cache_enabled) - cache = &device->default_pipeline_cache; + if (cache == NULL) + cache = device->default_pipeline_cache; pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -3147,7 +3147,7 @@ VkResult genX(CreateGraphicsPipelines)( const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines) { - ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache); + VK_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache); VkResult result = VK_SUCCESS; @@ -3186,7 +3186,7 @@ VkResult genX(CreateComputePipelines)( const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines) { - ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache); + VK_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache); VkResult result = VK_SUCCESS; @@ -3234,7 +3234,7 @@ assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo static VkResult ray_tracing_pipeline_create( VkDevice _device, - struct anv_pipeline_cache * cache, + struct vk_pipeline_cache * cache, const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) @@ -3245,8 +3245,8 @@ ray_tracing_pipeline_create( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR); /* Use the default pipeline cache if none is specified */ - if (cache == NULL && device->physical->instance->pipeline_cache_enabled) - cache = &device->default_pipeline_cache; + if (cache == NULL) + cache = device->default_pipeline_cache; VK_MULTIALLOC(ma); VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1); @@ -3370,7 +3370,7 @@ genX(CreateRayTracingPipelinesKHR)( const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines) { - ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache); + VK_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache); VkResult result = VK_SUCCESS; -- 2.7.4