From b1964ad4d6c9532e3dcac87d163a0131ece5d515 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Mon, 14 Sep 2020 10:04:40 +0200 Subject: [PATCH] aco: Extract lanecount_to_mask to a separate function. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Timur Kristóf Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 28 ++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 2cfbfdd..6635734 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -10629,20 +10629,19 @@ void cleanup_cfg(Program *program) } } -Temp merged_wave_info_to_mask(isel_context *ctx, unsigned i) +Temp lanecount_to_mask(isel_context *ctx, Temp count, bool allow64 = true) { - Builder bld(ctx->program, ctx->block); - - /* The s_bfm only cares about s0.u[5:0] so we don't need either s_bfe nor s_and here */ - Temp count = i == 0 - ? get_arg(ctx, ctx->args->merged_wave_info) - : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->merged_wave_info), Operand(i * 8u)); + assert(count.regClass() == s1); + Builder bld(ctx->program, ctx->block); Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand(0u)); Temp cond; if (ctx->program->wave_size == 64) { + /* If we know that all 64 threads can't be active at a time, we just use the mask as-is */ + if (!allow64) + return mask; + /* Special case for 64 active invocations, because 64 doesn't work with s_bfm */ Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, Operand(6u /* log2(64) */)); cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), mask, bld.scc(active_64)); @@ -10654,6 +10653,19 @@ Temp merged_wave_info_to_mask(isel_context *ctx, unsigned i) return cond; } +Temp merged_wave_info_to_mask(isel_context *ctx, unsigned i) +{ + Builder bld(ctx->program, ctx->block); + + /* lanecount_to_mask() only cares about s0.u[6:0] so we don't need either s_bfe nor s_and here */ + Temp count = i == 0 + ? get_arg(ctx, ctx->args->merged_wave_info) + : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), + get_arg(ctx, ctx->args->merged_wave_info), Operand(i * 8u)); + + return lanecount_to_mask(ctx, count); +} + bool ngg_early_prim_export(isel_context *ctx) { /* TODO: Check edge flags, and if they are written, return false. (Needed for OpenGL, not for Vulkan.) */ -- 2.7.4