From 9bc0fc89c8909fd5cc9c591aa98038f927a01e0e Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 19 Oct 2021 10:43:03 +0100 Subject: [PATCH] aco: disable mul(cndmask(0, 1, b), a) optimization sometimes MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This optimization doesn't work for SDWA or DPP multiplications and we can't do it if denormal flushing is required because v_cndmask_b32 doesn't do that and we can't do it if we can't assume operands are finite because 0.0 * inf is NaN, not 0. Signed-off-by: Rhys Perry Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_optimizer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index ad70ca1..8d88d08 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -3453,7 +3453,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) } } /* v_mul_f32(v_cndmask_b32(0, 1.0, cond), a) -> v_cndmask_b32(0, a, cond) */ - else if (instr->opcode == aco_opcode::v_mul_f32 && !instr->isVOP3()) { + else if (instr->opcode == aco_opcode::v_mul_f32 && !ctx.fp_mode.preserve_signed_zero_inf_nan32 && + !instr->usesModifiers() && !ctx.fp_mode.must_flush_denorms32) { for (unsigned i = 0; i < 2; i++) { if (instr->operands[i].isTemp() && ctx.info[instr->operands[i].tempId()].is_b2f() && ctx.uses[instr->operands[i].tempId()] == 1 && instr->operands[!i].isTemp() && -- 2.7.4