From 7d6ec9d43bd45843eebddb046db333f0b18c9495 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 14 Jun 2018 01:10:54 -0400 Subject: [PATCH] radeonsi/gfx9: insert the barrier between merged shaders inside the if block --- src/gallium/drivers/radeonsi/si_shader.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b04ad21..332e316 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -6140,16 +6140,24 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx) if (!shader->is_monolithic) ac_init_exec_full_mask(&ctx->ac); - /* The barrier must execute for all shaders in a - * threadgroup. - */ - si_llvm_emit_barrier(NULL, bld_base, NULL); - LLVMValueRef num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8); LLVMValueRef ena = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num_threads, ""); lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena); + + /* The barrier must execute for all shaders in a + * threadgroup. + * + * Execute the barrier inside the conditional block, + * so that empty waves can jump directly to s_endpgm, + * which will also signal the barrier. + * + * If the shader is TCS and the TCS epilog is present + * and contains a barrier, it will wait there and then + * reach s_endpgm. + */ + si_llvm_emit_barrier(NULL, bld_base, NULL); } } -- 2.7.4