From 12d2df15f178591d45193b6cc3e093281e2ea0aa Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 1 Jun 2021 01:09:57 -0400 Subject: [PATCH] ac/llvm: add a callback to ac_cull_triangle to generate code in inner-most block This will reduce jumps in culling code. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/llvm/ac_llvm_cull.c | 13 ++++++++++--- src/amd/llvm/ac_llvm_cull.h | 7 ++++++- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 2 +- src/gallium/drivers/radeonsi/si_compute_prim_discard.c | 3 ++- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/amd/llvm/ac_llvm_cull.c b/src/amd/llvm/ac_llvm_cull.c index 3c185d6..028e125 100644 --- a/src/amd/llvm/ac_llvm_cull.c +++ b/src/amd/llvm/ac_llvm_cull.c @@ -120,7 +120,8 @@ static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4 LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, bool cull_view_xy, bool cull_view_near_z, bool cull_view_far_z, bool cull_small_prims, - bool use_halfz_clip_space) + bool use_halfz_clip_space, ac_cull_accept_func accept_func, + void *userdata) { LLVMBuilderRef builder = ctx->builder; @@ -200,6 +201,9 @@ static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4 accepted = LLVMBuildAnd(builder, accepted, visible, ""); } + if (accept_func) + accept_func(ctx, accepted, userdata); + LLVMBuildStore(builder, accepted, accepted_var); } ac_build_endif(ctx, 10000000); @@ -222,11 +226,13 @@ static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4 * the rasterizer. Set to num_samples / 2^subpixel_bits. * subpixel_bits are defined by the quantization mode. * \param options See ac_cull_options. + * \param accept_func Callback invoked in the inner-most branch where the primitive is accepted. */ LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, - struct ac_cull_options *options) + struct ac_cull_options *options, ac_cull_accept_func accept_func, + void *userdata) { struct ac_position_w_info w; ac_analyze_position_w(ctx, pos, &w); @@ -244,6 +250,7 @@ LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4 /* View culling and small primitive elimination. */ accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision, options->cull_view_xy, options->cull_view_near_z, options->cull_view_far_z, - options->cull_small_prims, options->use_halfz_clip_space); + options->cull_small_prims, options->use_halfz_clip_space, accept_func, + userdata); return accepted; } diff --git a/src/amd/llvm/ac_llvm_cull.h b/src/amd/llvm/ac_llvm_cull.h index 2c4b7f7..5e35111 100644 --- a/src/amd/llvm/ac_llvm_cull.h +++ b/src/amd/llvm/ac_llvm_cull.h @@ -48,9 +48,14 @@ struct ac_cull_options { bool use_halfz_clip_space; }; +/* Callback invoked in the inner-most branch where the primitive is accepted. */ +typedef void (*ac_cull_accept_func)(struct ac_llvm_context *ctx, LLVMValueRef accepted, + void *userdata); + LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, - struct ac_cull_options *options); + struct ac_cull_options *options, ac_cull_accept_func accept_func, + void *userdata); #endif diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index d72c72e..0bcd8b4 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -985,7 +985,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out /* Tell ES threads whether their vertex survived. */ ac_build_ifcc(&ctx->ac, ac_cull_triangle(&ctx->ac, pos, ctx->ac.i1true, vp_scale, vp_translate, - small_prim_precision, &options), + small_prim_precision, &options, NULL, NULL), 16003); { LLVMBuildStore(builder, ctx->ac.i32_1, gs_accepted); diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c index c830a85..a750866 100644 --- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c +++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c @@ -673,7 +673,8 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx) LLVMValueRef accepted = ac_cull_triangle(&ctx->ac, pos, prim_restart_accepted, vp_scale, vp_translate, - ac_get_arg(&ctx->ac, param_smallprim_precision), &options); + ac_get_arg(&ctx->ac, param_smallprim_precision), &options, + NULL, NULL); ac_build_optimization_barrier(&ctx->ac, &accepted, false); LLVMValueRef accepted_threadmask = ac_get_i1_sgpr_mask(&ctx->ac, accepted); -- 2.7.4