From 7a35113455a703a8b5ff5544f527895af6763e01 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 16 Aug 2023 16:55:21 +0300 Subject: [PATCH] anv: add simple shader support without a command buffer Limited to compute for now. Annoyingly Gfx9 requires a binding table block. Signed-off-by: Lionel Landwerlin Acked-by: Emma Anholt Part-of: --- src/intel/vulkan/anv_private.h | 9 ++- .../vulkan/genX_cmd_draw_generated_indirect.h | 12 ++-- src/intel/vulkan/genX_query.c | 11 ++-- src/intel/vulkan/genX_simple_shader.h | 66 ++++++++++++++-------- 4 files changed, 63 insertions(+), 35 deletions(-) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 5cd18fc..66d68e8 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -3118,13 +3118,18 @@ anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound, * State tracking for simple internal shaders */ struct anv_simple_shader { - /* The command buffer associated with this emission */ + /* The device associated with this emission */ + struct anv_device *device; + /* The command buffer associated with this emission (can be NULL) */ struct anv_cmd_buffer *cmd_buffer; + /* State stream used for various internal allocations */ + struct anv_state_stream *dynamic_state_stream; + struct anv_state_stream *general_state_stream; /* Where to emit the commands (can be different from cmd_buffer->batch) */ struct anv_batch *batch; /* Shader to use */ struct anv_shader_bin *kernel; - /**/ + /* L3 config used by the shader */ const struct intel_l3_config *l3_config; /* Managed by the simpler shader helper*/ diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h index 6a0cdf9..0c6093a 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -146,10 +146,14 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b struct anv_device *device = cmd_buffer->device; struct anv_simple_shader *state = &cmd_buffer->generation_shader_state; *state = (struct anv_simple_shader) { - .cmd_buffer = cmd_buffer, - .batch = &cmd_buffer->generation_batch, - .kernel = device->internal_kernels[ANV_INTERNAL_KERNEL_GENERATED_DRAWS], - .l3_config = device->internal_kernels_l3_config, + .device = device, + .cmd_buffer = cmd_buffer, + .dynamic_state_stream = &cmd_buffer->dynamic_state_stream, + .general_state_stream = &cmd_buffer->general_state_stream, + .batch = &cmd_buffer->generation_batch, + .kernel = device->internal_kernels[ + ANV_INTERNAL_KERNEL_GENERATED_DRAWS], + .l3_config = device->internal_kernels_l3_config, }; genX(emit_simple_shader_init)(state); diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 35612b9..f5543b8 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -1703,13 +1703,16 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer, } struct anv_simple_shader state = { - .cmd_buffer = cmd_buffer, - .batch = &cmd_buffer->batch, - .kernel = device->internal_kernels[ + .device = cmd_buffer->device, + .cmd_buffer = cmd_buffer, + .dynamic_state_stream = &cmd_buffer->dynamic_state_stream, + .general_state_stream = &cmd_buffer->general_state_stream, + .batch = &cmd_buffer->batch, + .kernel = device->internal_kernels[ cmd_buffer->state.current_pipeline == GPGPU ? ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE : ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT], - .l3_config = device->internal_kernels_l3_config, + .l3_config = device->internal_kernels_l3_config, }; genX(emit_simple_shader_init)(&state); diff --git a/src/intel/vulkan/genX_simple_shader.h b/src/intel/vulkan/genX_simple_shader.h index f256893..34d3486 100644 --- a/src/intel/vulkan/genX_simple_shader.h +++ b/src/intel/vulkan/genX_simple_shader.h @@ -36,10 +36,11 @@ static void genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state) { - assert(state->cmd_buffer->state.current_pipeline == _3D); + assert(state->cmd_buffer == NULL || + state->cmd_buffer->state.current_pipeline == _3D); struct anv_batch *batch = state->batch; - struct anv_device *device = state->cmd_buffer->device; + struct anv_device *device = state->device; const struct brw_wm_prog_data *prog_data = brw_wm_prog_data_const(state->kernel->prog_data); @@ -214,8 +215,8 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state) anv_batch_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(state->cmd_buffer, - 4 * GENX(CC_VIEWPORT_length), 32); + anv_state_stream_alloc(state->dynamic_state_stream, + 4 * GENX(CC_VIEWPORT_length), 32); struct GENX(CC_VIEWPORT) cc_viewport = { .MinimumDepth = 0.0f, .MaximumDepth = 1.0f, @@ -341,13 +342,24 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state) static void genX(emit_simpler_shader_init_compute)(struct anv_simple_shader *state) { - assert(state->cmd_buffer->state.current_pipeline == GPGPU); + assert(state->cmd_buffer == NULL || + state->cmd_buffer->state.current_pipeline == GPGPU); #if GFX_VERx10 >= 125 struct anv_shader_bin *cs_bin = state->kernel; const struct brw_cs_prog_data *prog_data = (const struct brw_cs_prog_data *) cs_bin->prog_data; - genX(cmd_buffer_ensure_cfe_state)(state->cmd_buffer, prog_data->base.total_scratch); + /* Currently our simple shaders are simple enough that they never spill. */ + assert(prog_data->base.total_scratch == 0); + if (state->cmd_buffer != NULL) { + genX(cmd_buffer_ensure_cfe_state)(state->cmd_buffer, 0); + } else { + anv_batch_emit(state->batch, GENX(CFE_STATE), cfe) { + cfe.MaximumNumberofThreads = + state->device->info->max_cs_threads * + state->device->info->subslice_total; + } + } #endif } @@ -367,15 +379,13 @@ static struct anv_state genX(simple_shader_alloc_push)(struct anv_simple_shader *state, uint32_t size) { if (state->kernel->stage == MESA_SHADER_FRAGMENT) { - return anv_cmd_buffer_alloc_dynamic_state(state->cmd_buffer, - size, - ANV_UBO_ALIGNMENT); + return anv_state_stream_alloc(state->dynamic_state_stream, + size, ANV_UBO_ALIGNMENT); } else { #if GFX_VERx10 >= 125 - return anv_state_stream_alloc(&state->cmd_buffer->general_state_stream, - size, 64); + return anv_state_stream_alloc(state->general_state_stream, align(size, 64), 64); #else - return anv_cmd_buffer_alloc_dynamic_state(state->cmd_buffer, size, 64); + return anv_state_stream_alloc(state->dynamic_state_stream, size, 64); #endif } } @@ -386,17 +396,14 @@ genX(simple_shader_push_state_address)(struct anv_simple_shader *state, { if (state->kernel->stage == MESA_SHADER_FRAGMENT) { return anv_state_pool_state_address( - &state->cmd_buffer->device->dynamic_state_pool, - push_state); + &state->device->dynamic_state_pool, push_state); } else { #if GFX_VERx10 >= 125 return anv_state_pool_state_address( - &state->cmd_buffer->device->general_state_pool, - push_state); + &state->device->general_state_pool, push_state); #else return anv_state_pool_state_address( - &state->cmd_buffer->device->dynamic_state_pool, - push_state); + &state->device->dynamic_state_pool, push_state); #endif } } @@ -406,15 +413,20 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state, uint32_t num_threads, struct anv_state push_state) { - struct anv_device *device = state->cmd_buffer->device; + struct anv_device *device = state->device; struct anv_batch *batch = state->batch; struct anv_address push_addr = anv_state_pool_state_address(&device->dynamic_state_pool, push_state); if (state->kernel->stage == MESA_SHADER_FRAGMENT) { + /* At the moment we require a command buffer associated with this + * emission as we need to allocate binding tables on Gfx9. + */ + assert(state->cmd_buffer != NULL); + struct anv_state vs_data_state = - anv_cmd_buffer_alloc_dynamic_state( - state->cmd_buffer, 9 * sizeof(uint32_t), 32); + anv_state_stream_alloc(state->dynamic_state_stream, + 9 * sizeof(uint32_t), 32); float x0 = 0.0f, x1 = MIN2(num_threads, 8192); float y0 = 0.0f, y1 = DIV_ROUND_UP(num_threads, 8192); @@ -546,7 +558,12 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state, enum anv_pipe_bits emitted_bits = 0; genX(emit_apply_pipe_flushes)(batch, device, GPGPU, ANV_PIPE_CS_STALL_BIT, &emitted_bits); - anv_cmd_buffer_update_pending_query_bits(state->cmd_buffer, emitted_bits); + + /* If we have a command buffer allocated with the emission, update the + * pending bits. + */ + if (state->cmd_buffer) + anv_cmd_buffer_update_pending_query_bits(state->cmd_buffer, emitted_bits); anv_batch_emit(batch, GENX(MEDIA_VFE_STATE), vfe) { vfe.StackSize = 0; @@ -576,9 +593,8 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state, } } struct anv_state iface_desc_state = - anv_cmd_buffer_alloc_dynamic_state(state->cmd_buffer, - GENX(INTERFACE_DESCRIPTOR_DATA_length) * 4, - 64); + anv_state_stream_alloc(state->dynamic_state_stream, + GENX(INTERFACE_DESCRIPTOR_DATA_length) * 4, 64); struct GENX(INTERFACE_DESCRIPTOR_DATA) iface_desc = { .KernelStartPointer = state->kernel->kernel.offset + brw_cs_prog_data_prog_offset(prog_data, -- 2.7.4