#include "vk_util.h"
#include "util/build_id.h"
+#include "util/debug.h"
#ifdef VK_USE_PLATFORM_XCB_KHR
#include <xcb/xcb.h>
return vk_error(NULL, result);
}
+ instance->pipeline_cache_enabled =
+ env_var_as_boolean("V3DV_ENABLE_PIPELINE_CACHE", true);
+
glsl_type_singleton_init_or_ref();
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
p_stage->module = src->module;
p_stage->nir = nir_shader_clone(NULL, src->nir);
p_stage->spec_info = src->spec_info;
+ memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
/* Technically we could share the hash_table, but having their own makes
* destroy p_stage more straightforward
return 0;
}
+static nir_shader*
+pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
+ struct v3dv_pipeline *pipeline,
+ struct v3dv_pipeline_cache *cache)
+{
+ nir_shader *nir = NULL;
+
+ nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
+ &v3dv_nir_options,
+ p_stage->shader_sha1);
+
+ if (nir) {
+ assert(nir->info.stage == p_stage->stage);
+ return nir;
+ }
+
+ nir = shader_module_compile_to_nir(pipeline->device, p_stage);
+
+ if (nir) {
+ v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
+ p_stage->shader_sha1);
+ return nir;
+ }
+
+ /* FIXME: this shouldn't happen, raise error? */
+ return NULL;
+}
+
+static void
+pipeline_hash_shader(const struct v3dv_shader_module *module,
+ const char *entrypoint,
+ gl_shader_stage stage,
+ const VkSpecializationInfo *spec_info,
+ unsigned char *sha1_out)
+{
+ struct mesa_sha1 ctx;
+ _mesa_sha1_init(&ctx);
+
+ _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
+ _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
+ _mesa_sha1_update(&ctx, &stage, sizeof(stage));
+ if (spec_info) {
+ _mesa_sha1_update(&ctx, spec_info->pMapEntries,
+ spec_info->mapEntryCount *
+ sizeof(*spec_info->pMapEntries));
+ _mesa_sha1_update(&ctx, spec_info->pData,
+ spec_info->dataSize);
+ }
+
+ _mesa_sha1_final(&ctx, sha1_out);
+}
+
static VkResult
pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
+ struct v3dv_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator)
{
p_stage->module = v3dv_shader_module_from_handle(sinfo->module);
p_stage->spec_info = sinfo->pSpecializationInfo;
+ pipeline_hash_shader(p_stage->module,
+ p_stage->entrypoint,
+ stage,
+ p_stage->spec_info,
+ p_stage->shader_sha1);
+
pipeline->active_stages |= sinfo->stage;
- /* FIXME: when cache support is in place, first check if for the given
- * spirv module and options, we already have a nir shader.
- */
- p_stage->nir = shader_module_compile_to_nir(pipeline->device, p_stage);
+ p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
stages[stage] = p_stage;
}
static VkResult
pipeline_init(struct v3dv_pipeline *pipeline,
struct v3dv_device *device,
+ struct v3dv_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator)
{
pipeline->primitive_restart =
pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
- result = pipeline_compile_graphics(pipeline, pCreateInfo, pAllocator);
+ result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
if (result != VK_SUCCESS) {
/* Caller would already destroy the pipeline, and we didn't allocate any
VkPipeline *pPipeline)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
+ V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
struct v3dv_pipeline *pipeline;
VkResult result;
if (pipeline == NULL)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- result = pipeline_init(pipeline, device,
+ result = pipeline_init(pipeline, device, cache,
pCreateInfo,
pAllocator);
static VkResult
pipeline_compile_compute(struct v3dv_pipeline *pipeline,
+ struct v3dv_pipeline_cache *cache,
const VkComputePipelineCreateInfo *info,
const VkAllocationCallbacks *alloc)
{
p_stage->entrypoint = sinfo->pName;
p_stage->module = v3dv_shader_module_from_handle(sinfo->module);
p_stage->spec_info = sinfo->pSpecializationInfo;
- p_stage->nir = shader_module_compile_to_nir(pipeline->device, p_stage);
+
+ pipeline_hash_shader(p_stage->module,
+ p_stage->entrypoint,
+ stage,
+ p_stage->spec_info,
+ p_stage->shader_sha1);
+
+ p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
pipeline->active_stages |= sinfo->stage;
st_nir_opts(p_stage->nir);
static VkResult
compute_pipeline_init(struct v3dv_pipeline *pipeline,
struct v3dv_device *device,
+ struct v3dv_pipeline_cache *cache,
const VkComputePipelineCreateInfo *info,
const VkAllocationCallbacks *alloc)
{
pipeline->device = device;
pipeline->layout = layout;
- VkResult result = pipeline_compile_compute(pipeline, info, alloc);
+ VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
return result;
}
VkPipeline *pPipeline)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
+ V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
struct v3dv_pipeline *pipeline;
VkResult result;
if (pipeline == NULL)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- result = compute_pipeline_init(pipeline, device, pCreateInfo, pAllocator);
+ result = compute_pipeline_init(pipeline, device, cache,
+ pCreateInfo, pAllocator);
if (result != VK_SUCCESS) {
v3dv_destroy_pipeline(pipeline, device, pAllocator);
return result;
#include "v3dv_private.h"
#include "vulkan/util/vk_util.h"
+#include "util/blob.h"
+#include "nir/nir_serialize.h"
+static const bool dump_stats = false;
+static const bool dump_stats_verbose = false;
+
+static uint32_t
+sha1_hash_func(const void *sha1)
+{
+ return _mesa_hash_data(sha1, 20);
+}
+
+static bool
+sha1_compare_func(const void *sha1_a, const void *sha1_b)
+{
+ return memcmp(sha1_a, sha1_b, 20) == 0;
+}
+
+struct serialized_nir {
+ unsigned char sha1_key[20];
+ size_t size;
+ char data[0];
+};
+
+static void
+cache_dump_stats(struct v3dv_pipeline_cache *cache)
+{
+ if (!dump_stats_verbose)
+ return;
+
+ fprintf(stderr, " NIR cache entries: %d\n", cache->nir_stats.count);
+ fprintf(stderr, " NIR cache miss count: %d\n", cache->nir_stats.miss);
+ fprintf(stderr, " NIR cache hit count: %d\n", cache->nir_stats.hit);
+}
+
+void
+v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
+ struct v3dv_pipeline_cache *cache,
+ nir_shader *nir,
+ unsigned char sha1_key[20])
+{
+ if (!cache || !cache->nir_cache)
+ return;
+
+ pthread_mutex_lock(&cache->mutex);
+ struct hash_entry *entry =
+ _mesa_hash_table_search(cache->nir_cache, sha1_key);
+ pthread_mutex_unlock(&cache->mutex);
+ if (entry)
+ return;
+
+ struct blob blob;
+ blob_init(&blob);
+
+ nir_serialize(&blob, nir, false);
+ if (blob.out_of_memory) {
+ blob_finish(&blob);
+ return;
+ }
+
+ pthread_mutex_lock(&cache->mutex);
+ /* Because ralloc isn't thread-safe, we have to do all this inside the
+ * lock. We could unlock for the big memcpy but it's probably not worth
+ * the hassle.
+ */
+ entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
+ if (entry) {
+ blob_finish(&blob);
+ pthread_mutex_unlock(&cache->mutex);
+ return;
+ }
+
+ struct serialized_nir *snir =
+ ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
+ memcpy(snir->sha1_key, sha1_key, 20);
+ snir->size = blob.size;
+ memcpy(snir->data, blob.data, blob.size);
+
+ blob_finish(&blob);
+
+ if (unlikely(dump_stats)) {
+ char sha1buf[41];
+ _mesa_sha1_format(sha1buf, snir->sha1_key);
+ fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
+
+ cache->nir_stats.count++;
+ cache_dump_stats(cache);
+ }
+
+ _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
+
+ pthread_mutex_unlock(&cache->mutex);
+}
+
+nir_shader*
+v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
+ struct v3dv_pipeline_cache *cache,
+ const nir_shader_compiler_options *nir_options,
+ unsigned char sha1_key[20])
+{
+ if (!cache || !cache->nir_cache)
+ return NULL;
+
+ if (unlikely(dump_stats)) {
+ char sha1buf[41];
+ _mesa_sha1_format(sha1buf, sha1_key);
+
+ fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
+ }
+
+ const struct serialized_nir *snir = NULL;
+
+ pthread_mutex_lock(&cache->mutex);
+ struct hash_entry *entry =
+ _mesa_hash_table_search(cache->nir_cache, sha1_key);
+ if (entry)
+ snir = entry->data;
+ pthread_mutex_unlock(&cache->mutex);
+
+ if (snir) {
+ struct blob_reader blob;
+ blob_reader_init(&blob, snir->data, snir->size);
+
+ /* We use context NULL as we want the p_stage to keep the reference to
+ * nir, as we keep open the possibility of provide a shader variant
+ * after cache creation
+ */
+ nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
+ if (blob.overrun) {
+ ralloc_free(nir);
+ } else {
+ if (unlikely(dump_stats)) {
+ cache->nir_stats.hit++;
+ cache_dump_stats(cache);
+ }
+ return nir;
+ }
+ }
+
+ if (unlikely(dump_stats)) {
+ cache->nir_stats.miss++;
+ cache_dump_stats(cache);
+ }
+
+ return NULL;
+}
static void
pipeline_cache_init(struct v3dv_pipeline_cache *cache,
- struct v3dv_device *device)
+ struct v3dv_device *device,
+ bool cache_enabled)
{
cache->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
cache->device = device;
pthread_mutex_init(&cache->mutex, NULL);
+
+ if (cache_enabled) {
+ cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
+ sha1_compare_func);
+ cache->nir_stats.miss = 0;
+ cache->nir_stats.hit = 0;
+ cache->nir_stats.count = 0;
+ } else {
+ cache->nir_cache = NULL;
+ }
+
}
static void
if (cache == NULL)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- pipeline_cache_init(cache, device);
+ pipeline_cache_init(cache, device,
+ device->instance->pipeline_cache_enabled);
if (pCreateInfo->initialDataSize > 0) {
pipeline_cache_load(cache,
pthread_mutex_destroy(&cache->mutex);
+ if (cache->nir_cache) {
+ hash_table_foreach(cache->nir_cache, entry)
+ ralloc_free(entry->data);
+
+ _mesa_hash_table_destroy(cache->nir_cache, NULL);
+ }
+
vk_free2(&device->alloc, pAllocator, cache);
}
struct v3dv_physical_device physicalDevice;
struct vk_debug_report_instance debug_report_callbacks;
+
+ bool pipeline_cache_enabled;
};
/* Tracks wait threads spawned from a single vkQueueSubmit call */
#define V3DV_META_BLIT_CACHE_KEY_SIZE (3 * sizeof(uint32_t))
+struct v3dv_pipeline_cache_stats {
+ uint32_t miss;
+ uint32_t hit;
+ uint32_t count;
+};
+
struct v3dv_pipeline_cache {
VK_LOADER_DATA _loader_data;
struct v3dv_device *device;
mtx_t mutex;
+
+ struct hash_table *nir_cache;
+ struct v3dv_pipeline_cache_stats nir_stats;
};
struct v3dv_device {
nir_shader *nir;
+ /* The following is the combined hash of module+entrypoint+spec_info+nir */
+ unsigned char shader_sha1[20];
+
/** A name for this program, so you can track it in shader-db output. */
uint32_t program_id;
/** How many variants of this program were compiled, for shader-db. */
return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
}
+void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
+ struct v3dv_pipeline_cache *cache,
+ nir_shader *nir,
+ unsigned char sha1_key[20]);
+
+nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
+ struct v3dv_pipeline_cache *cache,
+ const nir_shader_compiler_options *nir_options,
+ unsigned char sha1_key[20]);
+
+
#define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType) \
\
static inline struct __v3dv_type * \