From 4cee8ce7a589a77e13a269c6cff68b749555111e Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 16 May 2023 16:10:31 +0300 Subject: [PATCH] anv: generalize internal kernel concept We'll add more of those kernels for other purposes. Signed-off-by: Lionel Landwerlin Reviewed-by: Ivan Briano Part-of: --- src/intel/vulkan/anv_device.c | 4 +- src/intel/vulkan/anv_generated_indirect_draws.c | 340 ------------------- src/intel/vulkan/anv_internal_kernels.c | 360 +++++++++++++++++++++ ...ted_indirect_draws.h => anv_internal_kernels.h} | 0 src/intel/vulkan/anv_private.h | 29 +- .../vulkan/genX_cmd_draw_generated_indirect.h | 6 +- src/intel/vulkan/meson.build | 3 +- 7 files changed, 390 insertions(+), 352 deletions(-) delete mode 100644 src/intel/vulkan/anv_generated_indirect_draws.c create mode 100644 src/intel/vulkan/anv_internal_kernels.c rename src/intel/vulkan/{anv_generated_indirect_draws.h => anv_internal_kernels.h} (100%) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index a0cb46b..f1166da 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -3423,7 +3423,7 @@ VkResult anv_CreateDevice( anv_device_init_border_colors(device); - anv_device_init_generated_indirect_draws(device); + anv_device_init_internal_kernels(device); anv_device_perf_init(device); @@ -3518,7 +3518,7 @@ void anv_DestroyDevice( anv_device_finish_rt_shaders(device); - anv_device_finish_generated_indirect_draws(device); + anv_device_finish_internal_kernels(device); vk_pipeline_cache_destroy(device->internal_cache, NULL); vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL); diff --git a/src/intel/vulkan/anv_generated_indirect_draws.c b/src/intel/vulkan/anv_generated_indirect_draws.c deleted file mode 100644 index 820e7c4..0000000 --- a/src/intel/vulkan/anv_generated_indirect_draws.c +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Copyright © 2022 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_private.h" - -#include "vk_nir.h" - -#include "compiler/brw_compiler.h" -#include "compiler/brw_nir.h" -#include "compiler/spirv/nir_spirv.h" -#include "dev/intel_debug.h" -#include "util/macros.h" - -#include "anv_generated_indirect_draws.h" - -#include "shaders/gfx9_generated_draws_spv.h" -#include "shaders/gfx11_generated_draws_spv.h" - -/* This pass takes vulkan descriptor bindings 0 & 1 and turns them into global - * 64bit addresses. Binding 2 is left UBO that would normally be accessed - * through the binding table but it fully promoted to push constants. - * - * As a result we're not using the binding table at all which is nice because - * of the side command buffer we use for the generating shader does not - * interact with the binding table allocation. - */ -static bool -lower_vulkan_descriptors_instr(nir_builder *b, nir_instr *instr, void *cb_data) -{ - if (instr->type != nir_instr_type_intrinsic) - return false; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor) - return false; - - nir_instr *res_index_instr = intrin->src[0].ssa->parent_instr; - assert(res_index_instr->type == nir_instr_type_intrinsic); - nir_intrinsic_instr *res_index_intrin = - nir_instr_as_intrinsic(res_index_instr); - assert(res_index_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); - - b->cursor = nir_after_instr(instr); - - nir_ssa_def *desc_value = NULL; - switch (nir_intrinsic_binding(res_index_intrin)) { - case 0: { - desc_value = - nir_load_ubo(b, 1, 64, - nir_imm_int(b, 2), - nir_imm_int(b, - offsetof(struct anv_generated_indirect_params, - indirect_data_addr)), - .align_mul = 8, - .align_offset = 0, - .range_base = 0, - .range = ~0); - desc_value = - nir_vec4(b, - nir_unpack_64_2x32_split_x(b, desc_value), - nir_unpack_64_2x32_split_y(b, desc_value), - nir_imm_int(b, 0), - nir_imm_int(b, 0)); - break; - } - - case 1: { - desc_value = - nir_load_ubo(b, 1, 64, - nir_imm_int(b, 2), - nir_imm_int(b, - offsetof(struct anv_generated_indirect_params, - generated_cmds_addr)), - .align_mul = 8, - .align_offset = 0, - .range_base = 0, - .range = ~0); - desc_value = - nir_vec4(b, - nir_unpack_64_2x32_split_x(b, desc_value), - nir_unpack_64_2x32_split_y(b, desc_value), - nir_imm_int(b, 0), - nir_imm_int(b, 0)); - break; - } - - case 2: { - desc_value = - nir_load_ubo(b, 1, 64, - nir_imm_int(b, 2), - nir_imm_int(b, - offsetof(struct anv_generated_indirect_params, - draw_ids_addr)), - .align_mul = 8, - .align_offset = 0, - .range_base = 0, - .range = ~0); - desc_value = - nir_vec4(b, - nir_unpack_64_2x32_split_x(b, desc_value), - nir_unpack_64_2x32_split_y(b, desc_value), - nir_imm_int(b, 0), - nir_imm_int(b, 0)); - break; - } - - case 3: - desc_value = - nir_vec2(b, - nir_imm_int(b, 2), - nir_imm_int(b, 0)); - break; - } - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_value); - - return true; -} - -static bool -lower_vulkan_descriptors(nir_shader *shader) -{ - return nir_shader_instructions_pass(shader, - lower_vulkan_descriptors_instr, - nir_metadata_block_index | - nir_metadata_dominance, - NULL); -} - -static struct anv_shader_bin * -compile_upload_spirv(struct anv_device *device, - const void *key, - uint32_t key_size, - const uint32_t *spirv_source, - uint32_t spirv_source_size, - uint32_t sends_count_expectation) -{ - struct spirv_to_nir_options spirv_options = { - .caps = { - .int64 = true, - }, - .ubo_addr_format = nir_address_format_32bit_index_offset, - .ssbo_addr_format = nir_address_format_64bit_global_32bit_offset, - .environment = NIR_SPIRV_VULKAN, - .create_library = false, - }; - const nir_shader_compiler_options *nir_options = - device->physical->compiler->nir_options[MESA_SHADER_FRAGMENT]; - - nir_shader* nir = - vk_spirv_to_nir(&device->vk, spirv_source, spirv_source_size * 4, - MESA_SHADER_FRAGMENT, "main", 0, NULL, &spirv_options, - nir_options, NULL); - - assert(nir != NULL); - - nir->info.internal = true; - - NIR_PASS_V(nir, nir_lower_vars_to_ssa); - NIR_PASS_V(nir, nir_opt_cse); - NIR_PASS_V(nir, nir_opt_gcm, true); - NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false); - - NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); - - NIR_PASS_V(nir, nir_split_var_copies); - NIR_PASS_V(nir, nir_split_per_member_structs); - - struct brw_compiler *compiler = device->physical->compiler; - struct brw_nir_compiler_opts opts = {}; - brw_preprocess_nir(compiler, nir, &opts); - - NIR_PASS_V(nir, nir_propagate_invariant, false); - - NIR_PASS_V(nir, nir_lower_input_attachments, - &(nir_input_attachment_options) { - .use_fragcoord_sysval = true, - .use_layer_id_sysval = true, - }); - - nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - - /* Do vectorizing here. For some reason when trying to do it in the back - * this just isn't working. - */ - nir_load_store_vectorize_options options = { - .modes = nir_var_mem_ubo | nir_var_mem_ssbo, - .callback = brw_nir_should_vectorize_mem, - .robust_modes = (nir_variable_mode)0, - }; - NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options); - - NIR_PASS_V(nir, lower_vulkan_descriptors); - - NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo, - nir_address_format_32bit_index_offset); - NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo, - nir_address_format_64bit_global_32bit_offset); - - NIR_PASS_V(nir, nir_copy_prop); - NIR_PASS_V(nir, nir_opt_constant_folding); - NIR_PASS_V(nir, nir_opt_dce); - - struct brw_wm_prog_key wm_key; - memset(&wm_key, 0, sizeof(wm_key)); - - struct brw_wm_prog_data wm_prog_data = { - .base.nr_params = nir->num_uniforms / 4, - }; - - brw_nir_analyze_ubo_ranges(compiler, nir, NULL, wm_prog_data.base.ubo_ranges); - - struct brw_compile_stats stats[3]; - struct brw_compile_fs_params params = { - .nir = nir, - .key = &wm_key, - .prog_data = &wm_prog_data, - .stats = stats, - .log_data = device, - .debug_flag = DEBUG_WM, - }; - const unsigned *program = brw_compile_fs(compiler, nir, ¶ms); - - unsigned stat_idx = 0; - if (wm_prog_data.dispatch_8) { - assert(stats[stat_idx].spills == 0); - assert(stats[stat_idx].fills == 0); - assert(stats[stat_idx].sends == sends_count_expectation); - stat_idx++; - } - if (wm_prog_data.dispatch_16) { - assert(stats[stat_idx].spills == 0); - assert(stats[stat_idx].fills == 0); - assert(stats[stat_idx].sends == sends_count_expectation); - stat_idx++; - } - if (wm_prog_data.dispatch_32) { - assert(stats[stat_idx].spills == 0); - assert(stats[stat_idx].fills == 0); - assert(stats[stat_idx].sends == sends_count_expectation * 2); - stat_idx++; - } - - struct anv_pipeline_bind_map bind_map; - memset(&bind_map, 0, sizeof(bind_map)); - - struct anv_push_descriptor_info push_desc_info = {}; - - struct anv_shader_bin *kernel = - anv_device_upload_kernel(device, - device->internal_cache, - nir->info.stage, - key, key_size, program, - wm_prog_data.base.program_size, - &wm_prog_data.base, sizeof(wm_prog_data), - NULL, 0, NULL, &bind_map, - &push_desc_info, - 0 /* dynamic_push_values */); - - ralloc_free(nir); - - return kernel; -} - -VkResult -anv_device_init_generated_indirect_draws(struct anv_device *device) -{ - const struct intel_l3_weights w = - intel_get_default_l3_weights(device->info, - true /* wants_dc_cache */, - false /* needs_slm */); - device->generated_draw_l3_config = intel_get_l3_config(device->info, w); - - struct { - char name[40]; - } indirect_draws_key = { - .name = "anv-generated-indirect-draws", - }; - - device->generated_draw_kernel = - anv_device_search_for_kernel(device, - device->internal_cache, - &indirect_draws_key, - sizeof(indirect_draws_key), - NULL); - if (device->generated_draw_kernel == NULL) { - const uint32_t *spirv_source = - device->info->ver >= 11 ? - gfx11_generated_draws_spv_source : - gfx9_generated_draws_spv_source; - const uint32_t spirv_source_size = - device->info->ver >= 11 ? - ARRAY_SIZE(gfx11_generated_draws_spv_source) : - ARRAY_SIZE(gfx9_generated_draws_spv_source); - const uint32_t send_count = - device->info->ver >= 11 ? - 11 /* 2 * (2 loads + 3 stores) + 1 store */ : - 17 /* 2 * (2 loads + 6 stores) + 1 store */; - - device->generated_draw_kernel = - compile_upload_spirv(device, - &indirect_draws_key, - sizeof(indirect_draws_key), - spirv_source, spirv_source_size, send_count); - } - if (device->generated_draw_kernel == NULL) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - /* The cache already has a reference and it's not going anywhere so there - * is no need to hold a second reference. - */ - anv_shader_bin_unref(device, device->generated_draw_kernel); - - return VK_SUCCESS; -} - -void -anv_device_finish_generated_indirect_draws(struct anv_device *device) -{ -} diff --git a/src/intel/vulkan/anv_internal_kernels.c b/src/intel/vulkan/anv_internal_kernels.c new file mode 100644 index 0000000..2d58e0b --- /dev/null +++ b/src/intel/vulkan/anv_internal_kernels.c @@ -0,0 +1,360 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#include "compiler/brw_compiler.h" +#include "compiler/brw_nir.h" +#include "compiler/spirv/nir_spirv.h" +#include "dev/intel_debug.h" +#include "util/macros.h" + +#include "vk_nir.h" + +#include "anv_internal_kernels.h" + +#include "shaders/gfx9_generated_draws_spv.h" +#include "shaders/gfx11_generated_draws_spv.h" + +static bool +lower_vulkan_descriptors_instr(nir_builder *b, nir_instr *instr, void *cb_data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor) + return false; + + nir_instr *res_index_instr = intrin->src[0].ssa->parent_instr; + assert(res_index_instr->type == nir_instr_type_intrinsic); + nir_intrinsic_instr *res_index_intrin = + nir_instr_as_intrinsic(res_index_instr); + assert(res_index_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + + b->cursor = nir_after_instr(instr); + + const struct anv_internal_kernel_bind_map *bind_map = cb_data; + uint32_t binding = nir_intrinsic_binding(res_index_intrin); + assert(binding < bind_map->num_bindings); + + nir_ssa_def *desc_value = NULL; + if (bind_map->bindings[binding].push_constant) { + desc_value = + nir_vec2(b, + nir_imm_int(b, binding), + nir_imm_int(b, 0)); + } else { + int push_constant_binding = -1; + for (uint32_t i = 0; i < bind_map->num_bindings; i++) { + if (bind_map->bindings[i].push_constant) { + push_constant_binding = i; + break; + } + } + assert(push_constant_binding != -1); + + desc_value = + nir_load_ubo(b, 1, 64, + nir_imm_int(b, push_constant_binding), + nir_imm_int(b, + bind_map->bindings[binding].address_offset), + .align_mul = 8, + .align_offset = 0, + .range_base = 0, + .range = ~0); + desc_value = + nir_vec4(b, + nir_unpack_64_2x32_split_x(b, desc_value), + nir_unpack_64_2x32_split_y(b, desc_value), + nir_imm_int(b, 0), + nir_imm_int(b, 0)); + } + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_value); + + return true; +} + +static bool +lower_vulkan_descriptors(nir_shader *shader, + const struct anv_internal_kernel_bind_map *bind_map) +{ + return nir_shader_instructions_pass(shader, + lower_vulkan_descriptors_instr, + nir_metadata_block_index | + nir_metadata_dominance, + (void *)bind_map); +} + +static struct anv_shader_bin * +compile_upload_spirv(struct anv_device *device, + gl_shader_stage stage, + const void *hash_key, + uint32_t hash_key_size, + const struct anv_internal_kernel_bind_map *bind_map, + const uint32_t *spirv_source, + uint32_t spirv_source_size, + uint32_t sends_count_expectation) +{ + struct spirv_to_nir_options spirv_options = { + .caps = { + .int64 = true, + }, + .ubo_addr_format = nir_address_format_32bit_index_offset, + .ssbo_addr_format = nir_address_format_64bit_global_32bit_offset, + .environment = NIR_SPIRV_VULKAN, + .create_library = false, + }; + const nir_shader_compiler_options *nir_options = + device->physical->compiler->nir_options[stage]; + + nir_shader* nir = + vk_spirv_to_nir(&device->vk, spirv_source, spirv_source_size * 4, + stage, "main", 0, NULL, &spirv_options, + nir_options, NULL); + + assert(nir != NULL); + + nir->info.internal = true; + + NIR_PASS_V(nir, nir_lower_vars_to_ssa); + NIR_PASS_V(nir, nir_opt_cse); + NIR_PASS_V(nir, nir_opt_gcm, true); + NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false); + + NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); + + NIR_PASS_V(nir, nir_split_var_copies); + NIR_PASS_V(nir, nir_split_per_member_structs); + + struct brw_compiler *compiler = device->physical->compiler; + struct brw_nir_compiler_opts opts = {}; + brw_preprocess_nir(compiler, nir, &opts); + + NIR_PASS_V(nir, nir_propagate_invariant, false); + + if (stage == MESA_SHADER_FRAGMENT) { + NIR_PASS_V(nir, nir_lower_input_attachments, + &(nir_input_attachment_options) { + .use_fragcoord_sysval = true, + .use_layer_id_sysval = true, + }); + } + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + /* Do vectorizing here. For some reason when trying to do it in the back + * this just isn't working. + */ + nir_load_store_vectorize_options options = { + .modes = nir_var_mem_ubo | nir_var_mem_ssbo, + .callback = brw_nir_should_vectorize_mem, + .robust_modes = (nir_variable_mode)0, + }; + NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options); + + NIR_PASS_V(nir, lower_vulkan_descriptors, bind_map); + + NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo, + nir_address_format_32bit_index_offset); + NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo, + nir_address_format_64bit_global_32bit_offset); + + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_constant_folding); + NIR_PASS_V(nir, nir_opt_dce); + + union brw_any_prog_key key; + memset(&key, 0, sizeof(key)); + + union brw_any_prog_data prog_data; + memset(&prog_data, 0, sizeof(prog_data)); + prog_data.base.nr_params = nir->num_uniforms / 4; + + brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data.base.ubo_ranges); + + const unsigned *program; + if (stage == MESA_SHADER_FRAGMENT) { + struct brw_compile_stats stats[3]; + struct brw_compile_fs_params params = { + .nir = nir, + .key = &key.wm, + .prog_data = &prog_data.wm, + .stats = stats, + .log_data = device, + .debug_flag = DEBUG_WM, + }; + program = brw_compile_fs(compiler, nir, ¶ms); + + unsigned stat_idx = 0; + if (prog_data.wm.dispatch_8) { + assert(stats[stat_idx].spills == 0); + assert(stats[stat_idx].fills == 0); + assert(stats[stat_idx].sends == sends_count_expectation); + stat_idx++; + } + if (prog_data.wm.dispatch_16) { + assert(stats[stat_idx].spills == 0); + assert(stats[stat_idx].fills == 0); + assert(stats[stat_idx].sends == sends_count_expectation); + stat_idx++; + } + if (prog_data.wm.dispatch_32) { + assert(stats[stat_idx].spills == 0); + assert(stats[stat_idx].fills == 0); + assert(stats[stat_idx].sends == sends_count_expectation * 2); + stat_idx++; + } + } else { + struct brw_compile_stats stats; + struct brw_compile_cs_params params = { + .nir = nir, + .key = &key.cs, + .prog_data = &prog_data.cs, + .stats = &stats, + .log_data = device, + .debug_flag = DEBUG_CS, + }; + program = brw_compile_cs(compiler, nir, ¶ms); + + assert(stats.spills == 0); + assert(stats.fills == 0); + assert(stats.sends == sends_count_expectation); + } + + struct anv_pipeline_bind_map dummy_bind_map; + memset(&dummy_bind_map, 0, sizeof(dummy_bind_map)); + + struct anv_push_descriptor_info push_desc_info = {}; + + struct anv_shader_bin *kernel = + anv_device_upload_kernel(device, + device->internal_cache, + nir->info.stage, + hash_key, hash_key_size, program, + prog_data.base.program_size, + &prog_data.base, sizeof(prog_data), + NULL, 0, NULL, &dummy_bind_map, + &push_desc_info, + 0 /* dynamic_push_values */); + + ralloc_free(nir); + + return kernel; +} + +VkResult +anv_device_init_internal_kernels(struct anv_device *device) +{ + const struct intel_l3_weights w = + intel_get_default_l3_weights(device->info, + true /* wants_dc_cache */, + false /* needs_slm */); + device->internal_kernels_l3_config = intel_get_l3_config(device->info, w); + + struct { + struct { + char name[40]; + } key; + + gl_shader_stage stage; + + const uint32_t *spirv_data; + uint32_t spirv_size; + + uint32_t send_count; + + struct anv_internal_kernel_bind_map bind_map; + } internal_kernels[] = { + [ANV_INTERNAL_KERNEL_GENERATED_DRAWS] = { + .key = { + .name = "anv-generated-indirect-draws", + }, + .stage = MESA_SHADER_FRAGMENT, + .spirv_data = device->info->ver >= 11 ? + gfx11_generated_draws_spv_source : + gfx9_generated_draws_spv_source, + .spirv_size = device->info->ver >= 11 ? + ARRAY_SIZE(gfx11_generated_draws_spv_source) : + ARRAY_SIZE(gfx9_generated_draws_spv_source), + .send_count = device->info->ver >= 11 ? + 11 /* 2 * (2 loads + 3 stores) + 1 store */ : + 17 /* 2 * (2 loads + 6 stores) + 1 store */, + .bind_map = { + .num_bindings = 4, + .bindings = { + { + .address_offset = offsetof(struct anv_generated_indirect_params, + indirect_data_addr), + }, + { + .address_offset = offsetof(struct anv_generated_indirect_params, + generated_cmds_addr), + }, + { + .address_offset = offsetof(struct anv_generated_indirect_params, + draw_ids_addr), + }, + { + .push_constant = true, + }, + }, + }, + }, + }; + + for (uint32_t i = 0; i < ARRAY_SIZE(internal_kernels); i++) { + device->internal_kernels[i] = + anv_device_search_for_kernel(device, + device->internal_cache, + &internal_kernels[i].key, + sizeof(internal_kernels[i].key), + NULL); + if (device->internal_kernels[i] == NULL) { + device->internal_kernels[i] = + compile_upload_spirv(device, + internal_kernels[i].stage, + &internal_kernels[i].key, + sizeof(internal_kernels[i].key), + &internal_kernels[i].bind_map, + internal_kernels[i].spirv_data, + internal_kernels[i].spirv_size, + internal_kernels[i].send_count); + } + if (device->internal_kernels[i] == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* The cache already has a reference and it's not going anywhere so + * there is no need to hold a second reference. + */ + anv_shader_bin_unref(device, device->internal_kernels[i]); + } + + return VK_SUCCESS; +} + +void +anv_device_finish_internal_kernels(struct anv_device *device) +{ +} diff --git a/src/intel/vulkan/anv_generated_indirect_draws.h b/src/intel/vulkan/anv_internal_kernels.h similarity index 100% rename from src/intel/vulkan/anv_generated_indirect_draws.h rename to src/intel/vulkan/anv_internal_kernels.h diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 5fb3f37..001c0ae 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1113,6 +1113,25 @@ anv_device_upload_nir(struct anv_device *device, void anv_load_fp64_shader(struct anv_device *device); +enum anv_internal_kernel_name { + ANV_INTERNAL_KERNEL_GENERATED_DRAWS, + + ANV_INTERNAL_KERNEL_COUNT, +}; + +struct anv_internal_kernel_bind_map { + uint32_t num_bindings; + struct { + /* Whether this binding is provided through push constants */ + bool push_constant; + + /* When not provided by push constants, this is offset at which the + * 64bit address of the binding is located in the push constant data. + */ + uint32_t address_offset; + } bindings[5]; +}; + enum anv_rt_bvh_build_method { ANV_BVH_BUILD_METHOD_TRIVIAL, ANV_BVH_BUILD_METHOD_NEW_SAH, @@ -1240,8 +1259,8 @@ struct anv_device { * Generates direct draw calls out of indirect parameters. Used to * workaround slowness with indirect draw calls. */ - struct anv_shader_bin *generated_draw_kernel; - const struct intel_l3_config *generated_draw_l3_config; + struct anv_shader_bin *internal_kernels[ANV_INTERNAL_KERNEL_COUNT]; + const struct intel_l3_config *internal_kernels_l3_config; pthread_mutex_t mutex; pthread_cond_t queue_submit; @@ -4598,10 +4617,8 @@ struct anv_memcpy_state { struct anv_vb_cache_range vb_dirty; }; -VkResult -anv_device_init_generated_indirect_draws(struct anv_device *device); -void -anv_device_finish_generated_indirect_draws(struct anv_device *device); +VkResult anv_device_init_internal_kernels(struct anv_device *device); +void anv_device_finish_internal_kernels(struct anv_device *device); /* This structure is used in 2 scenarios : * diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h index 93dfcef..7afb524 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -32,7 +32,7 @@ #include "common/intel_genX_state.h" #include "anv_private.h" -#include "anv_generated_indirect_draws.h" +#include "anv_internal_kernels.h" #include "genX_simple_shader.h" /* This is a maximum number of items a fragment shader can generate due to the @@ -149,8 +149,8 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b *state = (struct anv_simple_shader) { .cmd_buffer = cmd_buffer, .batch = &cmd_buffer->generation_batch, - .kernel = device->generated_draw_kernel, - .l3_config = device->generated_draw_l3_config, + .kernel = device->internal_kernels[ANV_INTERNAL_KERNEL_GENERATED_DRAWS], + .l3_config = device->internal_kernels_l3_config, }; genX(emit_simple_shader_init)(state); diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index 0c34e48..891cabf 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -156,9 +156,10 @@ libanv_files = files( 'anv_descriptor_set.c', 'anv_device.c', 'anv_formats.c', - 'anv_generated_indirect_draws.c', 'anv_genX.h', 'anv_image.c', + 'anv_internal_kernels.c', + 'anv_internal_kernels.h', 'anv_kmd_backend.c', 'anv_kmd_backend.h', 'anv_measure.c', -- 2.7.4