From 8d4c0197c3d9bfa87bd3f2c124b265f03dcdfdfc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 2 May 2022 21:53:12 -0400 Subject: [PATCH] radeonsi: wait before s_barrier in TCS epilog to fix LLVM 15 Only LGKM is needed here. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_shader_internal.h | 1 - src/gallium/drivers/radeonsi/si_shader_llvm.c | 14 -------------- src/gallium/drivers/radeonsi/si_shader_llvm_tess.c | 11 +++++++++-- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 2595045..432f413 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -219,7 +219,6 @@ LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueR LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret, struct ac_arg param, unsigned return_index); LLVMValueRef si_prolog_get_internal_bindings(struct si_shader_context *ctx); -void si_llvm_emit_barrier(struct si_shader_context *ctx); void si_llvm_declare_esgs_ring(struct si_shader_context *ctx); LLVMValueRef si_unpack_param(struct si_shader_context *ctx, struct ac_arg param, unsigned rshift, unsigned bitwidth); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index f83b788..3e36a7d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -319,20 +319,6 @@ LLVMValueRef si_prolog_get_internal_bindings(struct si_shader_context *ctx) return list; } -void si_llvm_emit_barrier(struct si_shader_context *ctx) -{ - /* GFX6 only (thanks to a hw bug workaround): - * The real barrier instruction isn’t needed, because an entire patch - * always fits into a single wave. - */ - if (ctx->screen->info.chip_class == GFX6 && ctx->stage == MESA_SHADER_TESS_CTRL) { - ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE); - return; - } - - ac_build_s_barrier(&ctx->ac); -} - /* Ensure that the esgs ring is declared. * * We declare it with 64KB alignment as a hint that the diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index c61bee8..d60710f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -683,8 +683,15 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re unsigned stride, outer_comps, inner_comps, i, offset; /* Add a barrier before loading tess factors from LDS. */ - if (!shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def) - si_llvm_emit_barrier(ctx); + if (!shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def) { + ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM); + + /* GFX6 only: s_barrier isn’t needed in TCS because an entire patch always fits into + * a single wave due to a bug workaround disallowing multi-wave HS workgroups. + */ + if (ctx->screen->info.chip_class != GFX6) + ac_build_s_barrier(&ctx->ac); + } /* Do this only for invocation 0, because the tess levels are per-patch, * not per-vertex. -- 2.7.4