From 00fd087f0a8c9268328058e0e0afc67096840e75 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Wed, 21 Apr 2021 14:20:45 +0200 Subject: [PATCH] aco: Allow workgroup barrier and shared scope for NGG shaders. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit NGG already needs to use workgroup barriers, but this commit allows them to come from NIR as opposed to just emitting it in ACO instruction selection. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 28 ++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 5eb77ab..b37c211 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6709,12 +6709,28 @@ void emit_scoped_barrier(isel_context *ctx, nir_intrinsic_instr *instr) { sync_scope mem_scope = translate_nir_scope(nir_intrinsic_memory_scope(instr)); sync_scope exec_scope = translate_nir_scope(nir_intrinsic_execution_scope(instr)); + /* We use shared storage for the following: + * - compute shaders expose it in their API + * - when tessellation is used, TCS and VS I/O is lowered to shared memory + * - when GS is used on GFX9+, VS->GS and TES->GS I/O is lowered to shared memory + * - additionally, when NGG is used on GFX10+, shared memory is used for certain features + */ + bool shared_storage_used = + ctx->stage.hw == HWStage::CS || + ctx->stage.hw == HWStage::LS || ctx->stage.hw == HWStage::HS || + (ctx->stage.hw == HWStage::GS && ctx->program->chip_class >= GFX9) || + ctx->stage.hw == HWStage::NGG; + + /* Workgroup barriers can hang merged shaders that can potentially have 0 threads in either half. + * They are allowed in CS, TCS, and in any NGG shader. + */ + ASSERTED bool workgroup_scope_allowed = + ctx->stage.hw == HWStage::CS || ctx->stage.hw == HWStage::HS || ctx->stage.hw == HWStage::NGG; + unsigned nir_storage = nir_intrinsic_memory_modes(instr); if (nir_storage & (nir_var_mem_ssbo | nir_var_mem_global)) storage |= storage_buffer | storage_image; //TODO: split this when NIR gets nir_var_mem_image - if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && (nir_storage & nir_var_mem_shared)) - storage |= storage_shared; - if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL && (nir_storage & nir_var_shader_out)) + if (shared_storage_used && (nir_storage & nir_var_mem_shared)) storage |= storage_shared; unsigned nir_semantics = nir_intrinsic_memory_semantics(instr); @@ -6724,11 +6740,7 @@ void emit_scoped_barrier(isel_context *ctx, nir_intrinsic_instr *instr) { semantics |= semantic_acquire | semantic_release; assert(!(nir_semantics & (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE))); - - /* Workgroup barriers can hang merged shaders that can potentially have 0 threads in either half. */ - assert(exec_scope != scope_workgroup || - ctx->shader->info.stage == MESA_SHADER_COMPUTE || - ctx->shader->info.stage == MESA_SHADER_TESS_CTRL); + assert(exec_scope != scope_workgroup || workgroup_scope_allowed); bld.barrier(aco_opcode::p_barrier, memory_sync_info((storage_class)storage, (memory_semantics)semantics, mem_scope), -- 2.7.4