From ba537ac25ad8c7edd38b8294d45db411a315228d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Sat, 8 Apr 2023 00:22:18 +0200 Subject: [PATCH] ac/llvm: Cover runtime 0 in GFX10 gs_alloc_req workaround. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Previously, the workaround only covered compile-time zero, but this is insufficient and can cause GPU hangs in RadeonSI when NGG culling is enabled. Fix this by handling runtime zero in the workaround. Cc: mesa-stable Signed-off-by: Timur Kristóf Reviewed-by: Qiang Yu Part-of: --- src/amd/llvm/ac_llvm_build.c | 65 ++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index b2f65a2..c1b86bc 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -3979,43 +3979,48 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt) { LLVMBuilderRef builder = ctx->builder; - LLVMValueRef tmp; - bool export_dummy_prim = false; - /* HW workaround for a GPU hang with 100% culling. + if (wave_id) + ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020); + + /* HW workaround for a GPU hang with 100% culling on GFX10. * We always have to export at least 1 primitive. * Export a degenerate triangle using vertex 0 for all 3 vertices. + * + * NOTE: We rely on the caller to set the vertex count also to 0 when the primitive count is 0. */ - if (prim_cnt == ctx->i32_0 && ctx->gfx_level == GFX10) { - assert(vtx_cnt == ctx->i32_0); - prim_cnt = ctx->i32_1; - vtx_cnt = ctx->i32_1; - export_dummy_prim = true; + if (ctx->gfx_level == GFX10) { + ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx), ctx->i32_0, ""), 5021); + LLVMValueRef prim_cnt_is_0 = LLVMBuildICmp(builder, LLVMIntEQ, prim_cnt, ctx->i32_0, ""); + ac_build_ifcc(ctx, prim_cnt_is_0, 5022); + { + LLVMValueRef x = LLVMBuildShl(builder, ctx->i32_1, LLVMConstInt(ctx->i32, 12, false), ""); + x = LLVMBuildOr(builder, x, ctx->i32_1, ""); + ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, x); + + /* The vertex indices are 0, 0, 0. */ + struct ac_ngg_prim prim = {0}; + prim.passthrough = ctx->i32_0; + + /* The HW culls primitives with NaN. */ + struct ac_export_args pos = {0}; + pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = LLVMConstReal(ctx->f32, NAN); + pos.target = V_008DFC_SQ_EXP_POS; + pos.enabled_channels = 0xf; + pos.done = true; + + ac_build_export_prim(ctx, &prim); + ac_build_export(ctx, &pos); + } + ac_build_else(ctx, 5022); } - if (wave_id) - ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020); + LLVMValueRef x = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->i32, 12, false), ""); + x = LLVMBuildOr(builder, x, vtx_cnt, ""); + ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, x); - tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->i32, 12, false), ""); - tmp = LLVMBuildOr(builder, tmp, vtx_cnt, ""); - ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp); - - if (export_dummy_prim) { - struct ac_ngg_prim prim = {0}; - /* The vertex indices are 0,0,0. */ - prim.passthrough = ctx->i32_0; - - struct ac_export_args pos = {0}; - /* The hw culls primitives with NaN. */ - pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = LLVMConstReal(ctx->f32, NAN); - pos.target = V_008DFC_SQ_EXP_POS; - pos.enabled_channels = 0xf; - pos.done = true; - - ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx), ctx->i32_0, ""), - 5021); - ac_build_export_prim(ctx, &prim); - ac_build_export(ctx, &pos); + if (ctx->gfx_level == GFX10) { + ac_build_endif(ctx, 5022); ac_build_endif(ctx, 5021); } -- 2.7.4