From eda281e9772deee630275ddd7c23fbac841ecb38 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 12 Feb 2019 21:02:04 -0500 Subject: [PATCH] ac: add LLVM code for triangle culling MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Tested-by: Dieter Nützel Acked-by: Nicolai Hähnle --- src/amd/Makefile.sources | 2 + src/amd/common/ac_llvm_cull.c | 275 ++++++++++++++++++++++++++++++++++++++++++ src/amd/common/ac_llvm_cull.h | 59 +++++++++ src/amd/common/meson.build | 2 + 4 files changed, 338 insertions(+) create mode 100644 src/amd/common/ac_llvm_cull.c create mode 100644 src/amd/common/ac_llvm_cull.h diff --git a/src/amd/Makefile.sources b/src/amd/Makefile.sources index 58e0008..e1557ff 100644 --- a/src/amd/Makefile.sources +++ b/src/amd/Makefile.sources @@ -39,6 +39,8 @@ AMD_COMPILER_FILES = \ common/ac_exp_param.h \ common/ac_llvm_build.c \ common/ac_llvm_build.h \ + common/ac_llvm_cull.c \ + common/ac_llvm_cull.h \ common/ac_llvm_helper.cpp \ common/ac_llvm_util.c \ common/ac_llvm_util.h \ diff --git a/src/amd/common/ac_llvm_cull.c b/src/amd/common/ac_llvm_cull.c new file mode 100644 index 0000000..1c2da3e --- /dev/null +++ b/src/amd/common/ac_llvm_cull.c @@ -0,0 +1,275 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#include "ac_llvm_cull.h" +#include + +struct ac_position_w_info { + /* If a primitive intersects the W=0 plane, it causes a reflection + * of the determinant used for face culling. Every vertex behind + * the W=0 plane negates the determinant, so having 2 vertices behind + * the plane has no effect. This is i1 true if the determinant should be + * negated. + */ + LLVMValueRef w_reflection; + + /* If we simplify the "-w <= p <= w" view culling equation, we get + * "-w <= w", which can't be satisfied when w is negative. + * In perspective projection, a negative W means that the primitive + * is behind the viewer, but the equation is independent of the type + * of projection. + * + * w_accepted is false when all W are negative and therefore + * the primitive is invisible. + */ + LLVMValueRef w_accepted; + + LLVMValueRef all_w_positive; + LLVMValueRef any_w_negative; +}; + +static void ac_analyze_position_w(struct ac_llvm_context *ctx, + LLVMValueRef pos[3][4], + struct ac_position_w_info *w) +{ + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef all_w_negative = ctx->i1true; + + w->w_reflection = ctx->i1false; + w->any_w_negative = ctx->i1false; + + for (unsigned i = 0; i < 3; i++) { + LLVMValueRef neg_w; + + neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, ""); + /* If neg_w is true, negate w_reflection. */ + w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, ""); + w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, ""); + all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, ""); + } + w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, ""); + w->w_accepted = LLVMBuildNot(builder, all_w_negative, ""); +} + +/* Perform front/back face culling and return true if the primitive is accepted. */ +static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, + LLVMValueRef pos[3][4], + struct ac_position_w_info *w, + bool cull_front, + bool cull_back, + bool cull_zero_area) +{ + LLVMBuilderRef builder = ctx->builder; + + if (cull_front && cull_back) + return ctx->i1false; + + if (!cull_front && !cull_back && !cull_zero_area) + return ctx->i1true; + + /* Front/back face culling. Also if the determinant == 0, the triangle + * area is 0. + */ + LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], ""); + LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], ""); + LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], ""); + LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], ""); + LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, ""); + LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, ""); + LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, ""); + + /* Negative W negates the determinant. */ + det = LLVMBuildSelect(builder, w->w_reflection, + LLVMBuildFNeg(builder, det, ""), + det, ""); + + LLVMValueRef accepted = NULL; + if (cull_front) { + LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE; + accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); + } else if (cull_back) { + LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE; + accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); + } else if (cull_zero_area) { + accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, ""); + } + return accepted; +} + +/* Perform view culling and small primitive elimination and return true + * if the primitive is accepted and initially_accepted == true. */ +static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, + LLVMValueRef pos[3][4], + LLVMValueRef initially_accepted, + struct ac_position_w_info *w, + LLVMValueRef vp_scale[2], + LLVMValueRef vp_translate[2], + LLVMValueRef small_prim_precision, + bool cull_view_xy, + bool cull_view_near_z, + bool cull_view_far_z, + bool cull_small_prims, + bool use_halfz_clip_space) +{ + LLVMBuilderRef builder = ctx->builder; + + if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims) + return ctx->i1true; + + /* Skip the culling if the primitive has already been rejected or + * if any W is negative. The bounding box culling doesn't work when + * W is negative. + */ + LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted, + w->all_w_positive, ""); + LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, ""); + LLVMBuildStore(builder, initially_accepted, accepted_var); + + ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */); + { + LLVMValueRef bbox_min[3], bbox_max[3]; + LLVMValueRef accepted = initially_accepted; + + /* Compute the primitive bounding box for easy culling. */ + for (unsigned chan = 0; chan < 3; chan++) { + bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]); + bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]); + + bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]); + bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]); + } + + /* View culling. */ + if (cull_view_xy || cull_view_near_z || cull_view_far_z) { + for (unsigned chan = 0; chan < 3; chan++) { + LLVMValueRef visible; + + if ((cull_view_xy && chan <= 1) || + (cull_view_near_z && chan == 2)) { + float t = chan == 2 && use_halfz_clip_space ? 0 : -1; + visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan], + LLVMConstReal(ctx->f32, t), ""); + accepted = LLVMBuildAnd(builder, accepted, visible, ""); + } + + if ((cull_view_xy && chan <= 1) || + (cull_view_far_z && chan == 2)) { + visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], + ctx->f32_1, ""); + accepted = LLVMBuildAnd(builder, accepted, visible, ""); + } + } + } + + /* Small primitive elimination. */ + if (cull_small_prims) { + /* Assuming a sample position at (0.5, 0.5), if we round + * the bounding box min/max extents and the results of + * the rounding are equal in either the X or Y direction, + * the bounding box does not intersect the sample. + * + * See these GDC slides for pictures: + * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf + */ + LLVMValueRef min, max, not_equal[2], visible; + + for (unsigned chan = 0; chan < 2; chan++) { + /* Convert the position to screen-space coordinates. */ + min = ac_build_fmad(ctx, bbox_min[chan], + vp_scale[chan], vp_translate[chan]); + max = ac_build_fmad(ctx, bbox_max[chan], + vp_scale[chan], vp_translate[chan]); + /* Scale the bounding box according to the precision of + * the rasterizer and the number of MSAA samples. */ + min = LLVMBuildFSub(builder, min, small_prim_precision, ""); + max = LLVMBuildFAdd(builder, max, small_prim_precision, ""); + + /* Determine if the bbox intersects the sample point. + * It also works for MSAA, but vp_scale, vp_translate, + * and small_prim_precision are computed differently. + */ + min = ac_build_round(ctx, min); + max = ac_build_round(ctx, max); + not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, ""); + } + visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], ""); + accepted = LLVMBuildAnd(builder, accepted, visible, ""); + } + + LLVMBuildStore(builder, accepted, accepted_var); + } + ac_build_endif(ctx, 10000000); + + return LLVMBuildLoad(builder, accepted_var, ""); +} + +/** + * Return i1 true if the primitive is accepted (not culled). + * + * \param pos Vertex positions 3x vec4 + * \param initially_accepted AND'ed with the result. Some computations can be + * skipped if this is false. + * \param vp_scale Viewport scale XY. + * For MSAA, multiply them by the number of samples. + * \param vp_translate Viewport translation XY. + * For MSAA, multiply them by the number of samples. + * \param small_prim_precision Precision of small primitive culling. This should + * be the same as or greater than the precision of + * the rasterizer. Set to num_samples / 2^subpixel_bits. + * subpixel_bits are defined by the quantization mode. + * \param options See ac_cull_options. + */ +LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, + LLVMValueRef pos[3][4], + LLVMValueRef initially_accepted, + LLVMValueRef vp_scale[2], + LLVMValueRef vp_translate[2], + LLVMValueRef small_prim_precision, + struct ac_cull_options *options) +{ + struct ac_position_w_info w; + ac_analyze_position_w(ctx, pos, &w); + + /* W culling. */ + LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true; + accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, ""); + + /* Face culling. */ + accepted = LLVMBuildAnd(ctx->builder, accepted, + ac_cull_face(ctx, pos, &w, + options->cull_front, + options->cull_back, + options->cull_zero_area), ""); + + /* View culling and small primitive elimination. */ + accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, + small_prim_precision, + options->cull_view_xy, + options->cull_view_near_z, + options->cull_view_far_z, + options->cull_small_prims, + options->use_halfz_clip_space); + return accepted; +} diff --git a/src/amd/common/ac_llvm_cull.h b/src/amd/common/ac_llvm_cull.h new file mode 100644 index 0000000..0aa6c90 --- /dev/null +++ b/src/amd/common/ac_llvm_cull.h @@ -0,0 +1,59 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#ifndef AC_LLVM_CULL_H +#define AC_LLVM_CULL_H + +#include "ac_llvm_build.h" + +struct ac_cull_options { + /* In general, I recommend setting all to true except view Z culling, + * which isn't so effective because W culling is cheaper and partially + * replaces near Z culling, and you don't need to set Position.z + * if Z culling is disabled. + * + * If something doesn't work, turn some of these off to find out what. + */ + bool cull_front; + bool cull_back; + bool cull_view_xy; + bool cull_view_near_z; + bool cull_view_far_z; + bool cull_small_prims; + bool cull_zero_area; + bool cull_w; /* cull primitives with all W < 0 */ + + bool use_halfz_clip_space; +}; + +LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, + LLVMValueRef pos[3][4], + LLVMValueRef initially_accepted, + LLVMValueRef vp_scale[2], + LLVMValueRef vp_translate[2], + LLVMValueRef small_prim_precision, + struct ac_cull_options *options); + +#endif diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index 6827a02..c034339 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -32,6 +32,8 @@ amd_common_files = files( 'ac_exp_param.h', 'ac_llvm_build.c', 'ac_llvm_build.h', + 'ac_llvm_cull.c', + 'ac_llvm_cull.h', 'ac_llvm_helper.cpp', 'ac_llvm_util.c', 'ac_llvm_util.h', -- 2.7.4