v3dv/pipeline_cache: cache nir shaders

author Alejandro Piñeiro <apinheiro@igalia.com>

Tue, 7 Jul 2020 13:56:44 +0000 (15:56 +0200)

committer Marge Bot <eric+marge@anholt.net>

Tue, 13 Oct 2020 21:21:32 +0000 (21:21 +0000)
author Alejandro Piñeiro <apinheiro@igalia.com>
Tue, 7 Jul 2020 13:56:44 +0000 (15:56 +0200)
committer Marge Bot <eric+marge@anholt.net>
Tue, 13 Oct 2020 21:21:32 +0000 (21:21 +0000)
diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c

index 7ebf10b..a509eed 100644 (file)
--- a/src/broadcom/vulkan/v3dv_device.c
+++ b/src/broadcom/vulkan/v3dv_device.c
@@ -45,6 +45,7 @@
  #include "vk_util.h"
  
  #include "util/build_id.h"
+#include "util/debug.h"
  
  #ifdef VK_USE_PLATFORM_XCB_KHR
  #include <xcb/xcb.h>
@@ -220,6 +221,9 @@ v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
        return vk_error(NULL, result);
     }
  
+   instance->pipeline_cache_enabled =
+      env_var_as_boolean("V3DV_ENABLE_PIPELINE_CACHE", true);
+
     glsl_type_singleton_init_or_ref();
  
     VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c

index cfe7fc7..9ce41e2 100644 (file)
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -1270,6 +1270,7 @@ pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src,
     p_stage->module = src->module;
     p_stage->nir = nir_shader_clone(NULL, src->nir);
     p_stage->spec_info = src->spec_info;
+   memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
  
     /* Technically we could share the hash_table, but having their own makes
      * destroy p_stage more straightforward
@@ -1567,8 +1568,61 @@ get_ucp_enable_mask(struct v3dv_pipeline_stage **stages)
     return 0;
  }
  
+static nir_shader*
+pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
+                       struct v3dv_pipeline *pipeline,
+                       struct v3dv_pipeline_cache *cache)
+{
+   nir_shader *nir = NULL;
+
+   nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
+                                            &v3dv_nir_options,
+                                            p_stage->shader_sha1);
+
+   if (nir) {
+      assert(nir->info.stage == p_stage->stage);
+      return nir;
+   }
+
+   nir = shader_module_compile_to_nir(pipeline->device, p_stage);
+
+   if (nir) {
+      v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
+                                     p_stage->shader_sha1);
+      return nir;
+   }
+
+   /* FIXME: this shouldn't happen, raise error? */
+   return NULL;
+}
+
+static void
+pipeline_hash_shader(const struct v3dv_shader_module *module,
+                     const char *entrypoint,
+                     gl_shader_stage stage,
+                     const VkSpecializationInfo *spec_info,
+                     unsigned char *sha1_out)
+{
+   struct mesa_sha1 ctx;
+   _mesa_sha1_init(&ctx);
+
+   _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
+   _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
+   _mesa_sha1_update(&ctx, &stage, sizeof(stage));
+   if (spec_info) {
+      _mesa_sha1_update(&ctx, spec_info->pMapEntries,
+                        spec_info->mapEntryCount *
+                        sizeof(*spec_info->pMapEntries));
+      _mesa_sha1_update(&ctx, spec_info->pData,
+                        spec_info->dataSize);
+   }
+
+   _mesa_sha1_final(&ctx, sha1_out);
+}
+
  static VkResult
  pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
+                          struct v3dv_pipeline_cache *cache,
                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
                            const VkAllocationCallbacks *pAllocator)
  {
@@ -1607,12 +1661,15 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
        p_stage->module = v3dv_shader_module_from_handle(sinfo->module);
        p_stage->spec_info = sinfo->pSpecializationInfo;
  
+      pipeline_hash_shader(p_stage->module,
+                           p_stage->entrypoint,
+                           stage,
+                           p_stage->spec_info,
+                           p_stage->shader_sha1);
+
        pipeline->active_stages |= sinfo->stage;
  
-      /* FIXME: when cache support is in place, first check if for the given
-       * spirv module and options, we already have a nir shader.
-       */
-      p_stage->nir = shader_module_compile_to_nir(pipeline->device, p_stage);
+      p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
  
        stages[stage] = p_stage;
     }
@@ -2519,6 +2576,7 @@ pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
  static VkResult
  pipeline_init(struct v3dv_pipeline *pipeline,
                struct v3dv_device *device,
+              struct v3dv_pipeline_cache *cache,
                const VkGraphicsPipelineCreateInfo *pCreateInfo,
                const VkAllocationCallbacks *pAllocator)
  {
@@ -2570,7 +2628,7 @@ pipeline_init(struct v3dv_pipeline *pipeline,
     pipeline->primitive_restart =
        pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
  
-   result = pipeline_compile_graphics(pipeline, pCreateInfo, pAllocator);
+   result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
  
     if (result != VK_SUCCESS) {
        /* Caller would already destroy the pipeline, and we didn't allocate any
@@ -2632,6 +2690,7 @@ graphics_pipeline_create(VkDevice _device,
                           VkPipeline *pPipeline)
  {
     V3DV_FROM_HANDLE(v3dv_device, device, _device);
+   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
  
     struct v3dv_pipeline *pipeline;
     VkResult result;
@@ -2641,7 +2700,7 @@ graphics_pipeline_create(VkDevice _device,
     if (pipeline == NULL)
        return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
  
-   result = pipeline_init(pipeline, device,
+   result = pipeline_init(pipeline, device, cache,
                            pCreateInfo,
                            pAllocator);
  
@@ -2706,6 +2765,7 @@ lower_cs_shared(struct nir_shader *nir)
  
  static VkResult
  pipeline_compile_compute(struct v3dv_pipeline *pipeline,
+                         struct v3dv_pipeline_cache *cache,
                           const VkComputePipelineCreateInfo *info,
                           const VkAllocationCallbacks *alloc)
  {
@@ -2730,7 +2790,14 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline,
     p_stage->entrypoint = sinfo->pName;
     p_stage->module = v3dv_shader_module_from_handle(sinfo->module);
     p_stage->spec_info = sinfo->pSpecializationInfo;
-   p_stage->nir = shader_module_compile_to_nir(pipeline->device, p_stage);
+
+   pipeline_hash_shader(p_stage->module,
+                        p_stage->entrypoint,
+                        stage,
+                        p_stage->spec_info,
+                        p_stage->shader_sha1);
+
+   p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
  
     pipeline->active_stages |= sinfo->stage;
     st_nir_opts(p_stage->nir);
@@ -2752,6 +2819,7 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline,
  static VkResult
  compute_pipeline_init(struct v3dv_pipeline *pipeline,
                        struct v3dv_device *device,
+                      struct v3dv_pipeline_cache *cache,
                        const VkComputePipelineCreateInfo *info,
                        const VkAllocationCallbacks *alloc)
  {
@@ -2760,7 +2828,7 @@ compute_pipeline_init(struct v3dv_pipeline *pipeline,
     pipeline->device = device;
     pipeline->layout = layout;
  
-   VkResult result = pipeline_compile_compute(pipeline, info, alloc);
+   VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
  
     return result;
  }
@@ -2773,6 +2841,7 @@ compute_pipeline_create(VkDevice _device,
                           VkPipeline *pPipeline)
  {
     V3DV_FROM_HANDLE(v3dv_device, device, _device);
+   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
  
     struct v3dv_pipeline *pipeline;
     VkResult result;
@@ -2782,7 +2851,8 @@ compute_pipeline_create(VkDevice _device,
     if (pipeline == NULL)
        return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
  
-   result = compute_pipeline_init(pipeline, device, pCreateInfo, pAllocator);
+   result = compute_pipeline_init(pipeline, device, cache,
+                                  pCreateInfo, pAllocator);
     if (result != VK_SUCCESS) {
        v3dv_destroy_pipeline(pipeline, device, pAllocator);
        return result;
diff --git a/src/broadcom/vulkan/v3dv_pipeline_cache.c b/src/broadcom/vulkan/v3dv_pipeline_cache.c

index d7718f8..d0874d1 100644 (file)
--- a/src/broadcom/vulkan/v3dv_pipeline_cache.c
+++ b/src/broadcom/vulkan/v3dv_pipeline_cache.c
@@ -23,16 +23,173 @@
  
  #include "v3dv_private.h"
  #include "vulkan/util/vk_util.h"
+#include "util/blob.h"
+#include "nir/nir_serialize.h"
  
+static const bool dump_stats = false;
+static const bool dump_stats_verbose = false;
+
+static uint32_t
+sha1_hash_func(const void *sha1)
+{
+   return _mesa_hash_data(sha1, 20);
+}
+
+static bool
+sha1_compare_func(const void *sha1_a, const void *sha1_b)
+{
+   return memcmp(sha1_a, sha1_b, 20) == 0;
+}
+
+struct serialized_nir {
+   unsigned char sha1_key[20];
+   size_t size;
+   char data[0];
+};
+
+static void
+cache_dump_stats(struct v3dv_pipeline_cache *cache)
+{
+   if (!dump_stats_verbose)
+      return;
+
+   fprintf(stderr, "  NIR cache entries:      %d\n", cache->nir_stats.count);
+   fprintf(stderr, "  NIR cache miss count:   %d\n", cache->nir_stats.miss);
+   fprintf(stderr, "  NIR cache hit  count:   %d\n", cache->nir_stats.hit);
+}
+
+void
+v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
+                               struct v3dv_pipeline_cache *cache,
+                               nir_shader *nir,
+                               unsigned char sha1_key[20])
+{
+   if (!cache || !cache->nir_cache)
+      return;
+
+   pthread_mutex_lock(&cache->mutex);
+   struct hash_entry *entry =
+      _mesa_hash_table_search(cache->nir_cache, sha1_key);
+   pthread_mutex_unlock(&cache->mutex);
+   if (entry)
+      return;
+
+   struct blob blob;
+   blob_init(&blob);
+
+   nir_serialize(&blob, nir, false);
+   if (blob.out_of_memory) {
+      blob_finish(&blob);
+      return;
+   }
+
+   pthread_mutex_lock(&cache->mutex);
+   /* Because ralloc isn't thread-safe, we have to do all this inside the
+    * lock.  We could unlock for the big memcpy but it's probably not worth
+    * the hassle.
+    */
+   entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
+   if (entry) {
+      blob_finish(&blob);
+      pthread_mutex_unlock(&cache->mutex);
+      return;
+   }
+
+   struct serialized_nir *snir =
+      ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
+   memcpy(snir->sha1_key, sha1_key, 20);
+   snir->size = blob.size;
+   memcpy(snir->data, blob.data, blob.size);
+
+   blob_finish(&blob);
+
+   if (unlikely(dump_stats)) {
+      char sha1buf[41];
+      _mesa_sha1_format(sha1buf, snir->sha1_key);
+      fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
+
+      cache->nir_stats.count++;
+      cache_dump_stats(cache);
+   }
+
+   _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
+
+   pthread_mutex_unlock(&cache->mutex);
+}
+
+nir_shader*
+v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
+                                   struct v3dv_pipeline_cache *cache,
+                                   const nir_shader_compiler_options *nir_options,
+                                   unsigned char sha1_key[20])
+{
+   if (!cache || !cache->nir_cache)
+      return NULL;
+
+   if (unlikely(dump_stats)) {
+      char sha1buf[41];
+      _mesa_sha1_format(sha1buf, sha1_key);
+
+      fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
+   }
+
+   const struct serialized_nir *snir = NULL;
+
+   pthread_mutex_lock(&cache->mutex);
+   struct hash_entry *entry =
+      _mesa_hash_table_search(cache->nir_cache, sha1_key);
+   if (entry)
+      snir = entry->data;
+   pthread_mutex_unlock(&cache->mutex);
+
+   if (snir) {
+      struct blob_reader blob;
+      blob_reader_init(&blob, snir->data, snir->size);
+
+      /* We use context NULL as we want the p_stage to keep the reference to
+       * nir, as we keep open the possibility of provide a shader variant
+       * after cache creation
+       */
+      nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
+      if (blob.overrun) {
+         ralloc_free(nir);
+      } else {
+         if (unlikely(dump_stats)) {
+            cache->nir_stats.hit++;
+            cache_dump_stats(cache);
+         }
+         return nir;
+      }
+   }
+
+   if (unlikely(dump_stats)) {
+      cache->nir_stats.miss++;
+      cache_dump_stats(cache);
+   }
+
+   return NULL;
+}
  
  static void
  pipeline_cache_init(struct v3dv_pipeline_cache *cache,
-                    struct v3dv_device *device)
+                    struct v3dv_device *device,
+                    bool cache_enabled)
  {
     cache->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
  
     cache->device = device;
     pthread_mutex_init(&cache->mutex, NULL);
+
+   if (cache_enabled) {
+      cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
+                                                 sha1_compare_func);
+      cache->nir_stats.miss = 0;
+      cache->nir_stats.hit = 0;
+      cache->nir_stats.count = 0;
+   } else {
+      cache->nir_cache = NULL;
+   }
+
  }
  
  static void
@@ -82,7 +239,8 @@ v3dv_CreatePipelineCache(VkDevice _device,
     if (cache == NULL)
        return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
  
-   pipeline_cache_init(cache, device);
+   pipeline_cache_init(cache, device,
+                       device->instance->pipeline_cache_enabled);
  
     if (pCreateInfo->initialDataSize > 0) {
        pipeline_cache_load(cache,
@@ -108,6 +266,13 @@ v3dv_DestroyPipelineCache(VkDevice _device,
  
     pthread_mutex_destroy(&cache->mutex);
  
+   if (cache->nir_cache) {
+      hash_table_foreach(cache->nir_cache, entry)
+         ralloc_free(entry->data);
+
+      _mesa_hash_table_destroy(cache->nir_cache, NULL);
+   }
+
     vk_free2(&device->alloc, pAllocator, cache);
  }
  
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h

index 520f6f7..143d62c 100644 (file)
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -200,6 +200,8 @@ struct v3dv_instance {
     struct v3dv_physical_device physicalDevice;
  
     struct vk_debug_report_instance debug_report_callbacks;
+
+   bool pipeline_cache_enabled;
  };
  
  /* Tracks wait threads spawned from a single vkQueueSubmit call */
@@ -261,11 +263,20 @@ struct v3dv_meta_blit_pipeline {
  
  #define V3DV_META_BLIT_CACHE_KEY_SIZE (3 * sizeof(uint32_t))
  
+struct v3dv_pipeline_cache_stats {
+   uint32_t miss;
+   uint32_t hit;
+   uint32_t count;
+};
+
  struct v3dv_pipeline_cache {
     VK_LOADER_DATA _loader_data;
  
     struct v3dv_device *device;
     mtx_t mutex;
+
+   struct hash_table *nir_cache;
+   struct v3dv_pipeline_cache_stats nir_stats;
  };
  
  struct v3dv_device {
@@ -1245,6 +1256,9 @@ struct v3dv_pipeline_stage {
  
     nir_shader *nir;
  
+   /* The following is the combined hash of module+entrypoint+spec_info+nir */
+   unsigned char shader_sha1[20];
+
     /** A name for this program, so you can track it in shader-db output. */
     uint32_t program_id;
     /** How many variants of this program were compiled, for shader-db. */
@@ -1739,6 +1753,17 @@ v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
     return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
  }
  
+void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
+                                    struct v3dv_pipeline_cache *cache,
+                                    nir_shader *nir,
+                                    unsigned char sha1_key[20]);
+
+nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
+                                               struct v3dv_pipeline_cache *cache,
+                                               const nir_shader_compiler_options *nir_options,
+                                               unsigned char sha1_key[20]);
+
+
  #define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType)   \
                                                          \
     static inline struct __v3dv_type *                    \
author	Alejandro Piñeiro <apinheiro@igalia.com>
	Tue, 7 Jul 2020 13:56:44 +0000 (15:56 +0200)
committer	Marge Bot <eric+marge@anholt.net>
	Tue, 13 Oct 2020 21:21:32 +0000 (21:21 +0000)
src/broadcom/vulkan/v3dv_device.c		patch \| blob \| history
src/broadcom/vulkan/v3dv_pipeline.c		patch \| blob \| history
src/broadcom/vulkan/v3dv_pipeline_cache.c		patch \| blob \| history
src/broadcom/vulkan/v3dv_private.h		patch \| blob \| history