From 943d131e7db4147113ac204e99382a18ce3fc99d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 3 Jan 2020 17:07:38 -0500 Subject: [PATCH] radeonsi/gfx10: merge main and pos/param export IF blocks into one if possible Acked-by: Pierre-Eric Pelloux-Prayer --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index b8c3463..a25c89b 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -629,7 +629,14 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, } } - ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label); + bool unterminated_es_if_block = + gfx10_is_ngg_passthrough(ctx->shader) && + !ctx->screen->use_ngg_streamout && /* no query buffer */ + (ctx->type != PIPE_SHADER_VERTEX || + !ctx->shader->key.mono.u.vs_export_prim_id); + + if (!unterminated_es_if_block) + ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label); LLVMValueRef is_gs_thread = si_is_gs_thread(ctx); LLVMValueRef is_es_thread = si_is_es_thread(ctx); @@ -647,8 +654,9 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, LLVMValueRef emitted_prims = NULL; if (sel->so.num_outputs) { - struct ngg_streamout nggso = {}; + assert(!unterminated_es_if_block); + struct ngg_streamout nggso = {}; nggso.num_vertices = num_vertices_val; nggso.prim_enable[0] = is_gs_thread; @@ -662,6 +670,8 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, LLVMValueRef user_edgeflags[3] = {}; if (sel->info.writes_edgeflag) { + assert(!unterminated_es_if_block); + /* Streamout already inserted the barrier, so don't insert it again. */ if (!sel->so.num_outputs) ac_build_s_barrier(&ctx->ac); @@ -686,6 +696,8 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, */ if (ctx->type == PIPE_SHADER_VERTEX && ctx->shader->key.mono.u.vs_export_prim_id) { + assert(!unterminated_es_if_block); + /* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */ if (sel->so.num_outputs || sel->info.writes_edgeflag) ac_build_s_barrier(&ctx->ac); @@ -709,6 +721,8 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, /* Update query buffer */ if (ctx->screen->use_ngg_streamout && !info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]) { + assert(!unterminated_es_if_block); + tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1); tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, ""); ac_build_ifcc(&ctx->ac, tmp, 5029); /* if (STREAMOUT_QUERY_ENABLED) */ @@ -742,11 +756,14 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, } /* Build the primitive export. */ - if (!gfx10_ngg_export_prim_early(ctx->shader)) + if (!gfx10_ngg_export_prim_early(ctx->shader)) { + assert(!unterminated_es_if_block); gfx10_ngg_build_export_prim(ctx, user_edgeflags); + } /* Export per-vertex data (positions and parameters). */ - ac_build_ifcc(&ctx->ac, is_es_thread, 6002); + if (!unterminated_es_if_block) + ac_build_ifcc(&ctx->ac, is_es_thread, 6002); { unsigned i; -- 2.7.4