From b330c7cb2a3dd6c6e1e05a76182896cf93473818 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 31 Aug 2021 21:45:21 -0400 Subject: [PATCH] radeonsi: use a trick to extract and pack edgeflags using fewer instructions This removes 4 instructions from the prim export packing. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/llvm/ac_llvm_build.c | 4 +--- src/amd/llvm/ac_llvm_build.h | 2 +- src/amd/vulkan/radv_nir_to_llvm.c | 9 ++------ src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 29 ++++++++++--------------- 4 files changed, 15 insertions(+), 29 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 0f07d90..cf0148c 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -4627,13 +4627,11 @@ LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ng LLVMBuilderRef builder = ctx->builder; LLVMValueRef tmp = LLVMBuildZExt(builder, prim->isnull, ctx->i32, ""); LLVMValueRef result = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 31, false), ""); + result = LLVMBuildOr(ctx->builder, result, prim->edgeflags, ""); for (unsigned i = 0; i < prim->num_vertices; ++i) { tmp = LLVMBuildShl(builder, prim->index[i], LLVMConstInt(ctx->i32, 10 * i, false), ""); result = LLVMBuildOr(builder, result, tmp, ""); - tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->i32, ""); - tmp = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 10 * i + 9, false), ""); - result = LLVMBuildOr(builder, result, tmp, ""); } return result; } diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 0bdab28..cdbec20 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -581,7 +581,7 @@ struct ac_ngg_prim { unsigned num_vertices; LLVMValueRef isnull; LLVMValueRef index[3]; - LLVMValueRef edgeflag[3]; + LLVMValueRef edgeflags; LLVMValueRef passthrough; }; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index dc357f2..2990e7c 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -1654,13 +1654,8 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx) } else { prim.num_vertices = num_vertices; prim.isnull = ctx->ac.i1false; + prim.edgeflags = ac_pack_edgeflags_for_export(&ctx->ac, &ctx->args->ac); memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3); - - for (unsigned i = 0; i < num_vertices; ++i) { - tmp = LLVMBuildLShr(builder, ac_get_arg(&ctx->ac, ctx->args->ac.gs_invocation_id), - LLVMConstInt(ctx->ac.i32, 8 + i, false), ""); - prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, ""); - } } ac_build_export_prim(&ctx->ac, &prim); @@ -1926,11 +1921,11 @@ gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx) tmp = ngg_gs_vertex_ptr(ctx, tid); flags = LLVMBuildLoad(builder, ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), ""); prim.isnull = LLVMBuildNot(builder, LLVMBuildTrunc(builder, flags, ctx->ac.i1, ""), ""); + prim.edgeflags = ctx->ac.i32_0; for (unsigned i = 0; i < verts_per_prim; ++i) { prim.index[i] = LLVMBuildSub(builder, vertlive_scan.result_exclusive, LLVMConstInt(ctx->ac.i32, verts_per_prim - i - 1, false), ""); - prim.edgeflag[i] = ctx->ac.i1false; } /* Geometry shaders output triangle strips, but NGG expects triangles. */ diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index c789c28..dc00d10 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -70,17 +70,6 @@ static LLVMValueRef ngg_get_query_buf(struct si_shader_context *ctx) LLVMConstInt(ctx->ac.i32, GFX10_GS_QUERY_BUF, false)); } -static LLVMValueRef ngg_get_initial_edgeflag(struct si_shader_context *ctx, unsigned index) -{ - if (ctx->stage == MESA_SHADER_VERTEX) { - LLVMValueRef tmp; - tmp = LLVMBuildLShr(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id), - LLVMConstInt(ctx->ac.i32, 8 + index, false), ""); - return LLVMBuildTrunc(ctx->ac.builder, tmp, ctx->ac.i1, ""); - } - return ctx->ac.i1false; -} - /** * Return the number of vertices as a constant in \p num_vertices, * and return a more precise value as LLVMValueRef from the function. @@ -190,19 +179,23 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use ngg_get_vertices_per_prim(ctx, &prim.num_vertices); prim.isnull = ctx->ac.i1false; + prim.edgeflags = ac_pack_edgeflags_for_export(&ctx->ac, &ctx->args); + for (unsigned i = 0; i < 3; ++i) prim.index[i] = si_unpack_param(ctx, ctx->args.gs_vtx_offset[i / 2], (i & 1) * 16, 16); - for (unsigned i = 0; i < prim.num_vertices; ++i) { - prim.edgeflag[i] = ngg_get_initial_edgeflag(ctx, i); + if (ctx->shader->selector->info.writes_edgeflag) { + LLVMValueRef edgeflags = ctx->ac.i32_0; - if (ctx->shader->selector->info.writes_edgeflag) { + for (unsigned i = 0; i < prim.num_vertices; ++i) { LLVMValueRef edge; edge = LLVMBuildLoad(ctx->ac.builder, user_edgeflags[i], ""); - edge = LLVMBuildAnd(ctx->ac.builder, prim.edgeflag[i], edge, ""); - prim.edgeflag[i] = edge; + edge = LLVMBuildZExt(ctx->ac.builder, edge, ctx->ac.i32, ""); + edge = LLVMBuildShl(ctx->ac.builder, edge, LLVMConstInt(ctx->ac.i32, 9 + i*10, 0), ""); + edgeflags = LLVMBuildOr(ctx->ac.builder, edgeflags, edge, ""); } + prim.edgeflags = LLVMBuildAnd(ctx->ac.builder, prim.edgeflags, edgeflags, ""); } ac_build_export_prim(&ctx->ac, &prim); @@ -1159,12 +1152,12 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) struct ac_ngg_prim prim = {}; prim.num_vertices = 3; prim.isnull = ctx->ac.i1false; + prim.edgeflags = ac_pack_edgeflags_for_export(&ctx->ac, &ctx->args); for (unsigned vtx = 0; vtx < 3; vtx++) { prim.index[vtx] = LLVMBuildLoad( builder, si_build_gep_i8(ctx, gs_vtxptr[vtx], lds_byte1_new_thread_id), ""); prim.index[vtx] = LLVMBuildZExt(builder, prim.index[vtx], ctx->ac.i32, ""); - prim.edgeflag[vtx] = ngg_get_initial_edgeflag(ctx, vtx); } /* Set the new GS input VGPR. */ @@ -1909,11 +1902,11 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) tmp = ngg_gs_vertex_ptr(ctx, tid); flags = LLVMBuildLoad(builder, ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), ""); prim.isnull = LLVMBuildNot(builder, LLVMBuildTrunc(builder, flags, ctx->ac.i1, ""), ""); + prim.edgeflags = ctx->ac.i32_0; for (unsigned i = 0; i < verts_per_prim; ++i) { prim.index[i] = LLVMBuildSub(builder, vertlive_scan.result_exclusive, LLVMConstInt(ctx->ac.i32, verts_per_prim - i - 1, false), ""); - prim.edgeflag[i] = ctx->ac.i1false; } /* Geometry shaders output triangle strips, but NGG expects triangles. */ -- 2.7.4