anv: generalize internal kernel concept
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Tue, 16 May 2023 13:10:31 +0000 (16:10 +0300)
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 14 Jun 2023 06:43:57 +0000 (09:43 +0300)
We'll add more of those kernels for other purposes.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23074>

src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_generated_indirect_draws.c [deleted file]
src/intel/vulkan/anv_internal_kernels.c [new file with mode: 0644]
src/intel/vulkan/anv_internal_kernels.h [moved from src/intel/vulkan/anv_generated_indirect_draws.h with 100% similarity]
src/intel/vulkan/anv_private.h
src/intel/vulkan/genX_cmd_draw_generated_indirect.h
src/intel/vulkan/meson.build

index a0cb46b..f1166da 100644 (file)
@@ -3423,7 +3423,7 @@ VkResult anv_CreateDevice(
 
    anv_device_init_border_colors(device);
 
-   anv_device_init_generated_indirect_draws(device);
+   anv_device_init_internal_kernels(device);
 
    anv_device_perf_init(device);
 
@@ -3518,7 +3518,7 @@ void anv_DestroyDevice(
 
    anv_device_finish_rt_shaders(device);
 
-   anv_device_finish_generated_indirect_draws(device);
+   anv_device_finish_internal_kernels(device);
 
    vk_pipeline_cache_destroy(device->internal_cache, NULL);
    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
diff --git a/src/intel/vulkan/anv_generated_indirect_draws.c b/src/intel/vulkan/anv_generated_indirect_draws.c
deleted file mode 100644 (file)
index 820e7c4..0000000
+++ /dev/null
@@ -1,340 +0,0 @@
-/*
- * Copyright © 2022 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "anv_private.h"
-
-#include "vk_nir.h"
-
-#include "compiler/brw_compiler.h"
-#include "compiler/brw_nir.h"
-#include "compiler/spirv/nir_spirv.h"
-#include "dev/intel_debug.h"
-#include "util/macros.h"
-
-#include "anv_generated_indirect_draws.h"
-
-#include "shaders/gfx9_generated_draws_spv.h"
-#include "shaders/gfx11_generated_draws_spv.h"
-
-/* This pass takes vulkan descriptor bindings 0 & 1 and turns them into global
- * 64bit addresses. Binding 2 is left UBO that would normally be accessed
- * through the binding table but it fully promoted to push constants.
- *
- * As a result we're not using the binding table at all which is nice because
- * of the side command buffer we use for the generating shader does not
- * interact with the binding table allocation.
- */
-static bool
-lower_vulkan_descriptors_instr(nir_builder *b, nir_instr *instr, void *cb_data)
-{
-   if (instr->type != nir_instr_type_intrinsic)
-      return false;
-
-   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-   if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
-      return false;
-
-   nir_instr *res_index_instr = intrin->src[0].ssa->parent_instr;
-   assert(res_index_instr->type == nir_instr_type_intrinsic);
-   nir_intrinsic_instr *res_index_intrin =
-      nir_instr_as_intrinsic(res_index_instr);
-   assert(res_index_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
-
-   b->cursor = nir_after_instr(instr);
-
-   nir_ssa_def *desc_value = NULL;
-   switch (nir_intrinsic_binding(res_index_intrin)) {
-   case 0: {
-      desc_value =
-         nir_load_ubo(b, 1, 64,
-                      nir_imm_int(b, 2),
-                      nir_imm_int(b,
-                                  offsetof(struct anv_generated_indirect_params,
-                                           indirect_data_addr)),
-                      .align_mul = 8,
-                      .align_offset = 0,
-                      .range_base = 0,
-                      .range = ~0);
-      desc_value =
-         nir_vec4(b,
-                  nir_unpack_64_2x32_split_x(b, desc_value),
-                  nir_unpack_64_2x32_split_y(b, desc_value),
-                  nir_imm_int(b, 0),
-                  nir_imm_int(b, 0));
-      break;
-   }
-
-   case 1: {
-      desc_value =
-         nir_load_ubo(b, 1, 64,
-                      nir_imm_int(b, 2),
-                      nir_imm_int(b,
-                                  offsetof(struct anv_generated_indirect_params,
-                                           generated_cmds_addr)),
-                      .align_mul = 8,
-                      .align_offset = 0,
-                      .range_base = 0,
-                      .range = ~0);
-      desc_value =
-         nir_vec4(b,
-                  nir_unpack_64_2x32_split_x(b, desc_value),
-                  nir_unpack_64_2x32_split_y(b, desc_value),
-                  nir_imm_int(b, 0),
-                  nir_imm_int(b, 0));
-      break;
-   }
-
-   case 2: {
-      desc_value =
-         nir_load_ubo(b, 1, 64,
-                      nir_imm_int(b, 2),
-                      nir_imm_int(b,
-                                  offsetof(struct anv_generated_indirect_params,
-                                           draw_ids_addr)),
-                      .align_mul = 8,
-                      .align_offset = 0,
-                      .range_base = 0,
-                      .range = ~0);
-      desc_value =
-         nir_vec4(b,
-                  nir_unpack_64_2x32_split_x(b, desc_value),
-                  nir_unpack_64_2x32_split_y(b, desc_value),
-                  nir_imm_int(b, 0),
-                  nir_imm_int(b, 0));
-      break;
-   }
-
-   case 3:
-      desc_value =
-         nir_vec2(b,
-                  nir_imm_int(b, 2),
-                  nir_imm_int(b, 0));
-      break;
-   }
-
-   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_value);
-
-   return true;
-}
-
-static bool
-lower_vulkan_descriptors(nir_shader *shader)
-{
-   return nir_shader_instructions_pass(shader,
-                                       lower_vulkan_descriptors_instr,
-                                       nir_metadata_block_index |
-                                       nir_metadata_dominance,
-                                       NULL);
-}
-
-static struct anv_shader_bin *
-compile_upload_spirv(struct anv_device *device,
-                     const void *key,
-                     uint32_t key_size,
-                     const uint32_t *spirv_source,
-                     uint32_t spirv_source_size,
-                     uint32_t sends_count_expectation)
-{
-   struct spirv_to_nir_options spirv_options = {
-      .caps = {
-         .int64 = true,
-      },
-      .ubo_addr_format = nir_address_format_32bit_index_offset,
-      .ssbo_addr_format = nir_address_format_64bit_global_32bit_offset,
-      .environment = NIR_SPIRV_VULKAN,
-      .create_library = false,
-   };
-   const nir_shader_compiler_options *nir_options =
-      device->physical->compiler->nir_options[MESA_SHADER_FRAGMENT];
-
-   nir_shader* nir =
-      vk_spirv_to_nir(&device->vk, spirv_source, spirv_source_size * 4,
-                      MESA_SHADER_FRAGMENT, "main", 0, NULL, &spirv_options,
-                      nir_options, NULL);
-
-   assert(nir != NULL);
-
-   nir->info.internal = true;
-
-   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
-   NIR_PASS_V(nir, nir_opt_cse);
-   NIR_PASS_V(nir, nir_opt_gcm, true);
-   NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
-
-   NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
-
-   NIR_PASS_V(nir, nir_split_var_copies);
-   NIR_PASS_V(nir, nir_split_per_member_structs);
-
-   struct brw_compiler *compiler = device->physical->compiler;
-   struct brw_nir_compiler_opts opts = {};
-   brw_preprocess_nir(compiler, nir, &opts);
-
-   NIR_PASS_V(nir, nir_propagate_invariant, false);
-
-   NIR_PASS_V(nir, nir_lower_input_attachments,
-            &(nir_input_attachment_options) {
-               .use_fragcoord_sysval = true,
-               .use_layer_id_sysval = true,
-            });
-
-   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
-
-   /* Do vectorizing here. For some reason when trying to do it in the back
-    * this just isn't working.
-    */
-   nir_load_store_vectorize_options options = {
-      .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
-      .callback = brw_nir_should_vectorize_mem,
-      .robust_modes = (nir_variable_mode)0,
-   };
-   NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
-
-   NIR_PASS_V(nir, lower_vulkan_descriptors);
-
-   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
-              nir_address_format_32bit_index_offset);
-   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
-              nir_address_format_64bit_global_32bit_offset);
-
-   NIR_PASS_V(nir, nir_copy_prop);
-   NIR_PASS_V(nir, nir_opt_constant_folding);
-   NIR_PASS_V(nir, nir_opt_dce);
-
-   struct brw_wm_prog_key wm_key;
-   memset(&wm_key, 0, sizeof(wm_key));
-
-   struct brw_wm_prog_data wm_prog_data = {
-      .base.nr_params = nir->num_uniforms / 4,
-   };
-
-   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, wm_prog_data.base.ubo_ranges);
-
-   struct brw_compile_stats stats[3];
-   struct brw_compile_fs_params params = {
-      .nir = nir,
-      .key = &wm_key,
-      .prog_data = &wm_prog_data,
-      .stats = stats,
-      .log_data = device,
-      .debug_flag = DEBUG_WM,
-   };
-   const unsigned *program = brw_compile_fs(compiler, nir, &params);
-
-   unsigned stat_idx = 0;
-   if (wm_prog_data.dispatch_8) {
-      assert(stats[stat_idx].spills == 0);
-      assert(stats[stat_idx].fills == 0);
-      assert(stats[stat_idx].sends == sends_count_expectation);
-      stat_idx++;
-   }
-   if (wm_prog_data.dispatch_16) {
-      assert(stats[stat_idx].spills == 0);
-      assert(stats[stat_idx].fills == 0);
-      assert(stats[stat_idx].sends == sends_count_expectation);
-      stat_idx++;
-   }
-   if (wm_prog_data.dispatch_32) {
-      assert(stats[stat_idx].spills == 0);
-      assert(stats[stat_idx].fills == 0);
-      assert(stats[stat_idx].sends == sends_count_expectation * 2);
-      stat_idx++;
-   }
-
-   struct anv_pipeline_bind_map bind_map;
-   memset(&bind_map, 0, sizeof(bind_map));
-
-   struct anv_push_descriptor_info push_desc_info = {};
-
-   struct anv_shader_bin *kernel =
-      anv_device_upload_kernel(device,
-                               device->internal_cache,
-                               nir->info.stage,
-                               key, key_size, program,
-                               wm_prog_data.base.program_size,
-                               &wm_prog_data.base, sizeof(wm_prog_data),
-                               NULL, 0, NULL, &bind_map,
-                               &push_desc_info,
-                               0 /* dynamic_push_values */);
-
-   ralloc_free(nir);
-
-   return kernel;
-}
-
-VkResult
-anv_device_init_generated_indirect_draws(struct anv_device *device)
-{
-   const struct intel_l3_weights w =
-      intel_get_default_l3_weights(device->info,
-                                   true /* wants_dc_cache */,
-                                   false /* needs_slm */);
-   device->generated_draw_l3_config = intel_get_l3_config(device->info, w);
-
-   struct {
-      char name[40];
-   } indirect_draws_key = {
-      .name = "anv-generated-indirect-draws",
-   };
-
-   device->generated_draw_kernel =
-      anv_device_search_for_kernel(device,
-                                   device->internal_cache,
-                                   &indirect_draws_key,
-                                   sizeof(indirect_draws_key),
-                                   NULL);
-   if (device->generated_draw_kernel == NULL) {
-      const uint32_t *spirv_source =
-         device->info->ver >= 11 ?
-         gfx11_generated_draws_spv_source :
-         gfx9_generated_draws_spv_source;
-      const uint32_t spirv_source_size =
-         device->info->ver >= 11 ?
-         ARRAY_SIZE(gfx11_generated_draws_spv_source) :
-         ARRAY_SIZE(gfx9_generated_draws_spv_source);
-      const uint32_t send_count =
-         device->info->ver >= 11 ?
-         11 /* 2 * (2 loads + 3 stores) + 1 store */ :
-         17 /* 2 * (2 loads + 6 stores) + 1 store */;
-
-      device->generated_draw_kernel =
-         compile_upload_spirv(device,
-                              &indirect_draws_key,
-                              sizeof(indirect_draws_key),
-                              spirv_source, spirv_source_size, send_count);
-   }
-   if (device->generated_draw_kernel == NULL)
-      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-   /* The cache already has a reference and it's not going anywhere so there
-    * is no need to hold a second reference.
-    */
-   anv_shader_bin_unref(device, device->generated_draw_kernel);
-
-   return VK_SUCCESS;
-}
-
-void
-anv_device_finish_generated_indirect_draws(struct anv_device *device)
-{
-}
diff --git a/src/intel/vulkan/anv_internal_kernels.c b/src/intel/vulkan/anv_internal_kernels.c
new file mode 100644 (file)
index 0000000..2d58e0b
--- /dev/null
@@ -0,0 +1,360 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+#include "compiler/brw_compiler.h"
+#include "compiler/brw_nir.h"
+#include "compiler/spirv/nir_spirv.h"
+#include "dev/intel_debug.h"
+#include "util/macros.h"
+
+#include "vk_nir.h"
+
+#include "anv_internal_kernels.h"
+
+#include "shaders/gfx9_generated_draws_spv.h"
+#include "shaders/gfx11_generated_draws_spv.h"
+
+static bool
+lower_vulkan_descriptors_instr(nir_builder *b, nir_instr *instr, void *cb_data)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+   if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
+      return false;
+
+   nir_instr *res_index_instr = intrin->src[0].ssa->parent_instr;
+   assert(res_index_instr->type == nir_instr_type_intrinsic);
+   nir_intrinsic_instr *res_index_intrin =
+      nir_instr_as_intrinsic(res_index_instr);
+   assert(res_index_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
+
+   b->cursor = nir_after_instr(instr);
+
+   const struct anv_internal_kernel_bind_map *bind_map = cb_data;
+   uint32_t binding = nir_intrinsic_binding(res_index_intrin);
+   assert(binding < bind_map->num_bindings);
+
+   nir_ssa_def *desc_value = NULL;
+   if (bind_map->bindings[binding].push_constant) {
+      desc_value =
+         nir_vec2(b,
+                  nir_imm_int(b, binding),
+                  nir_imm_int(b, 0));
+   } else {
+      int push_constant_binding = -1;
+      for (uint32_t i = 0; i < bind_map->num_bindings; i++) {
+         if (bind_map->bindings[i].push_constant) {
+            push_constant_binding = i;
+            break;
+         }
+      }
+      assert(push_constant_binding != -1);
+
+      desc_value =
+         nir_load_ubo(b, 1, 64,
+                      nir_imm_int(b, push_constant_binding),
+                      nir_imm_int(b,
+                                  bind_map->bindings[binding].address_offset),
+                      .align_mul = 8,
+                      .align_offset = 0,
+                      .range_base = 0,
+                      .range = ~0);
+      desc_value =
+         nir_vec4(b,
+                  nir_unpack_64_2x32_split_x(b, desc_value),
+                  nir_unpack_64_2x32_split_y(b, desc_value),
+                  nir_imm_int(b, 0),
+                  nir_imm_int(b, 0));
+   }
+
+   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_value);
+
+   return true;
+}
+
+static bool
+lower_vulkan_descriptors(nir_shader *shader,
+                         const struct anv_internal_kernel_bind_map *bind_map)
+{
+   return nir_shader_instructions_pass(shader,
+                                       lower_vulkan_descriptors_instr,
+                                       nir_metadata_block_index |
+                                       nir_metadata_dominance,
+                                       (void *)bind_map);
+}
+
+static struct anv_shader_bin *
+compile_upload_spirv(struct anv_device *device,
+                     gl_shader_stage stage,
+                     const void *hash_key,
+                     uint32_t hash_key_size,
+                     const struct anv_internal_kernel_bind_map *bind_map,
+                     const uint32_t *spirv_source,
+                     uint32_t spirv_source_size,
+                     uint32_t sends_count_expectation)
+{
+   struct spirv_to_nir_options spirv_options = {
+      .caps = {
+         .int64 = true,
+      },
+      .ubo_addr_format = nir_address_format_32bit_index_offset,
+      .ssbo_addr_format = nir_address_format_64bit_global_32bit_offset,
+      .environment = NIR_SPIRV_VULKAN,
+      .create_library = false,
+   };
+   const nir_shader_compiler_options *nir_options =
+      device->physical->compiler->nir_options[stage];
+
+   nir_shader* nir =
+      vk_spirv_to_nir(&device->vk, spirv_source, spirv_source_size * 4,
+                      stage, "main", 0, NULL, &spirv_options,
+                      nir_options, NULL);
+
+   assert(nir != NULL);
+
+   nir->info.internal = true;
+
+   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(nir, nir_opt_cse);
+   NIR_PASS_V(nir, nir_opt_gcm, true);
+   NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
+
+   NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
+
+   NIR_PASS_V(nir, nir_split_var_copies);
+   NIR_PASS_V(nir, nir_split_per_member_structs);
+
+   struct brw_compiler *compiler = device->physical->compiler;
+   struct brw_nir_compiler_opts opts = {};
+   brw_preprocess_nir(compiler, nir, &opts);
+
+   NIR_PASS_V(nir, nir_propagate_invariant, false);
+
+   if (stage == MESA_SHADER_FRAGMENT) {
+      NIR_PASS_V(nir, nir_lower_input_attachments,
+                 &(nir_input_attachment_options) {
+                    .use_fragcoord_sysval = true,
+                    .use_layer_id_sysval = true,
+                 });
+   }
+
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+   /* Do vectorizing here. For some reason when trying to do it in the back
+    * this just isn't working.
+    */
+   nir_load_store_vectorize_options options = {
+      .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
+      .callback = brw_nir_should_vectorize_mem,
+      .robust_modes = (nir_variable_mode)0,
+   };
+   NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
+
+   NIR_PASS_V(nir, lower_vulkan_descriptors, bind_map);
+
+   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
+              nir_address_format_32bit_index_offset);
+   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
+              nir_address_format_64bit_global_32bit_offset);
+
+   NIR_PASS_V(nir, nir_copy_prop);
+   NIR_PASS_V(nir, nir_opt_constant_folding);
+   NIR_PASS_V(nir, nir_opt_dce);
+
+   union brw_any_prog_key key;
+   memset(&key, 0, sizeof(key));
+
+   union brw_any_prog_data prog_data;
+   memset(&prog_data, 0, sizeof(prog_data));
+   prog_data.base.nr_params = nir->num_uniforms / 4;
+
+   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data.base.ubo_ranges);
+
+   const unsigned *program;
+   if (stage == MESA_SHADER_FRAGMENT) {
+      struct brw_compile_stats stats[3];
+      struct brw_compile_fs_params params = {
+         .nir = nir,
+         .key = &key.wm,
+         .prog_data = &prog_data.wm,
+         .stats = stats,
+         .log_data = device,
+         .debug_flag = DEBUG_WM,
+      };
+      program = brw_compile_fs(compiler, nir, &params);
+
+      unsigned stat_idx = 0;
+      if (prog_data.wm.dispatch_8) {
+         assert(stats[stat_idx].spills == 0);
+         assert(stats[stat_idx].fills == 0);
+         assert(stats[stat_idx].sends == sends_count_expectation);
+         stat_idx++;
+      }
+      if (prog_data.wm.dispatch_16) {
+         assert(stats[stat_idx].spills == 0);
+         assert(stats[stat_idx].fills == 0);
+         assert(stats[stat_idx].sends == sends_count_expectation);
+         stat_idx++;
+      }
+      if (prog_data.wm.dispatch_32) {
+         assert(stats[stat_idx].spills == 0);
+         assert(stats[stat_idx].fills == 0);
+         assert(stats[stat_idx].sends == sends_count_expectation * 2);
+         stat_idx++;
+      }
+   } else {
+      struct brw_compile_stats stats;
+      struct brw_compile_cs_params params = {
+         .nir = nir,
+         .key = &key.cs,
+         .prog_data = &prog_data.cs,
+         .stats = &stats,
+         .log_data = device,
+         .debug_flag = DEBUG_CS,
+      };
+      program = brw_compile_cs(compiler, nir, &params);
+
+      assert(stats.spills == 0);
+      assert(stats.fills == 0);
+      assert(stats.sends == sends_count_expectation);
+   }
+
+   struct anv_pipeline_bind_map dummy_bind_map;
+   memset(&dummy_bind_map, 0, sizeof(dummy_bind_map));
+
+   struct anv_push_descriptor_info push_desc_info = {};
+
+   struct anv_shader_bin *kernel =
+      anv_device_upload_kernel(device,
+                               device->internal_cache,
+                               nir->info.stage,
+                               hash_key, hash_key_size, program,
+                               prog_data.base.program_size,
+                               &prog_data.base, sizeof(prog_data),
+                               NULL, 0, NULL, &dummy_bind_map,
+                               &push_desc_info,
+                               0 /* dynamic_push_values */);
+
+   ralloc_free(nir);
+
+   return kernel;
+}
+
+VkResult
+anv_device_init_internal_kernels(struct anv_device *device)
+{
+   const struct intel_l3_weights w =
+      intel_get_default_l3_weights(device->info,
+                                   true /* wants_dc_cache */,
+                                   false /* needs_slm */);
+   device->internal_kernels_l3_config = intel_get_l3_config(device->info, w);
+
+   struct {
+      struct {
+         char name[40];
+      } key;
+
+      gl_shader_stage stage;
+
+      const uint32_t *spirv_data;
+      uint32_t        spirv_size;
+
+      uint32_t        send_count;
+
+      struct anv_internal_kernel_bind_map bind_map;
+   } internal_kernels[] = {
+      [ANV_INTERNAL_KERNEL_GENERATED_DRAWS] = {
+         .key        = {
+            .name    = "anv-generated-indirect-draws",
+         },
+         .stage      = MESA_SHADER_FRAGMENT,
+         .spirv_data = device->info->ver >= 11 ?
+                       gfx11_generated_draws_spv_source :
+                       gfx9_generated_draws_spv_source,
+         .spirv_size = device->info->ver >= 11 ?
+                       ARRAY_SIZE(gfx11_generated_draws_spv_source) :
+                       ARRAY_SIZE(gfx9_generated_draws_spv_source),
+         .send_count = device->info->ver >= 11 ?
+                       11 /* 2 * (2 loads + 3 stores) + 1 store */ :
+                       17 /* 2 * (2 loads + 6 stores) + 1 store */,
+         .bind_map   = {
+            .num_bindings = 4,
+            .bindings     = {
+               {
+                  .address_offset = offsetof(struct anv_generated_indirect_params,
+                                             indirect_data_addr),
+               },
+               {
+                  .address_offset = offsetof(struct anv_generated_indirect_params,
+                                             generated_cmds_addr),
+               },
+               {
+                  .address_offset = offsetof(struct anv_generated_indirect_params,
+                                             draw_ids_addr),
+               },
+               {
+                  .push_constant = true,
+               },
+            },
+         },
+      },
+   };
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(internal_kernels); i++) {
+      device->internal_kernels[i] =
+         anv_device_search_for_kernel(device,
+                                      device->internal_cache,
+                                      &internal_kernels[i].key,
+                                      sizeof(internal_kernels[i].key),
+                                      NULL);
+      if (device->internal_kernels[i] == NULL) {
+         device->internal_kernels[i] =
+            compile_upload_spirv(device,
+                                 internal_kernels[i].stage,
+                                 &internal_kernels[i].key,
+                                 sizeof(internal_kernels[i].key),
+                                 &internal_kernels[i].bind_map,
+                                 internal_kernels[i].spirv_data,
+                                 internal_kernels[i].spirv_size,
+                                 internal_kernels[i].send_count);
+      }
+      if (device->internal_kernels[i] == NULL)
+         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      /* The cache already has a reference and it's not going anywhere so
+       * there is no need to hold a second reference.
+       */
+      anv_shader_bin_unref(device, device->internal_kernels[i]);
+   }
+
+   return VK_SUCCESS;
+}
+
+void
+anv_device_finish_internal_kernels(struct anv_device *device)
+{
+}
index 5fb3f37..001c0ae 100644 (file)
@@ -1113,6 +1113,25 @@ anv_device_upload_nir(struct anv_device *device,
 void
 anv_load_fp64_shader(struct anv_device *device);
 
+enum anv_internal_kernel_name {
+   ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
+
+   ANV_INTERNAL_KERNEL_COUNT,
+};
+
+struct anv_internal_kernel_bind_map {
+   uint32_t num_bindings;
+   struct {
+      /* Whether this binding is provided through push constants */
+      bool     push_constant;
+
+      /* When not provided by push constants, this is offset at which the
+       * 64bit address of the binding is located in the push constant data.
+       */
+      uint32_t address_offset;
+   } bindings[5];
+};
+
 enum anv_rt_bvh_build_method {
    ANV_BVH_BUILD_METHOD_TRIVIAL,
    ANV_BVH_BUILD_METHOD_NEW_SAH,
@@ -1240,8 +1259,8 @@ struct anv_device {
      * Generates direct draw calls out of indirect parameters. Used to
      * workaround slowness with indirect draw calls.
      */
-    struct anv_shader_bin                      *generated_draw_kernel;
-    const struct intel_l3_config               *generated_draw_l3_config;
+    struct anv_shader_bin                      *internal_kernels[ANV_INTERNAL_KERNEL_COUNT];
+    const struct intel_l3_config               *internal_kernels_l3_config;
 
     pthread_mutex_t                             mutex;
     pthread_cond_t                              queue_submit;
@@ -4598,10 +4617,8 @@ struct anv_memcpy_state {
    struct anv_vb_cache_range vb_dirty;
 };
 
-VkResult
-anv_device_init_generated_indirect_draws(struct anv_device *device);
-void
-anv_device_finish_generated_indirect_draws(struct anv_device *device);
+VkResult anv_device_init_internal_kernels(struct anv_device *device);
+void anv_device_finish_internal_kernels(struct anv_device *device);
 
 /* This structure is used in 2 scenarios :
  *
index 93dfcef..7afb524 100644 (file)
@@ -32,7 +32,7 @@
 #include "common/intel_genX_state.h"
 
 #include "anv_private.h"
-#include "anv_generated_indirect_draws.h"
+#include "anv_internal_kernels.h"
 #include "genX_simple_shader.h"
 
 /* This is a maximum number of items a fragment shader can generate due to the
@@ -149,8 +149,8 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b
    *state = (struct anv_simple_shader) {
       .cmd_buffer = cmd_buffer,
       .batch      = &cmd_buffer->generation_batch,
-      .kernel     = device->generated_draw_kernel,
-      .l3_config  = device->generated_draw_l3_config,
+      .kernel     = device->internal_kernels[ANV_INTERNAL_KERNEL_GENERATED_DRAWS],
+      .l3_config  = device->internal_kernels_l3_config,
    };
 
    genX(emit_simple_shader_init)(state);
index 0c34e48..891cabf 100644 (file)
@@ -156,9 +156,10 @@ libanv_files = files(
   'anv_descriptor_set.c',
   'anv_device.c',
   'anv_formats.c',
-  'anv_generated_indirect_draws.c',
   'anv_genX.h',
   'anv_image.c',
+  'anv_internal_kernels.c',
+  'anv_internal_kernels.h',
   'anv_kmd_backend.c',
   'anv_kmd_backend.h',
   'anv_measure.c',