From 9d841507e1d0c8443e781f4e46602cd1917e72ca Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 24 Mar 2023 11:13:45 +0100 Subject: [PATCH] aco: support v_cvt_f32_f16 with opsel in combine_mad_mix Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_optimizer.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index bd2e413..91ca807 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -3977,10 +3977,10 @@ combine_mad_mix(opt_ctx& ctx, aco_ptr& instr) continue; Instruction* conv = ctx.info[tmp.id()].instr; - if (conv->isSDWA() && (conv->sdwa().dst_sel.size() != 4 || conv->sdwa().sel[0].size() != 2 || - conv->sdwa().clamp || conv->sdwa().omod)) { + if (conv->valu().clamp || conv->valu().omod) { continue; - } else if (conv->isVOP3() && (conv->valu().clamp || conv->valu().omod)) { + } else if (conv->isSDWA() && + (conv->sdwa().dst_sel.size() != 4 || conv->sdwa().sel[0].size() != 2)) { continue; } else if (conv->isDPP()) { continue; @@ -4010,9 +4010,11 @@ combine_mad_mix(opt_ctx& ctx, aco_ptr& instr) instr->operands[i].setTemp(conv->operands[0].getTemp()); if (conv->definitions[0].isPrecise()) instr->definitions[0].setPrecise(true); - instr->valu().opsel_hi[i] ^= true; + instr->valu().opsel_hi[i] = true; if (conv->isSDWA() && conv->sdwa().sel[0].offset() == 2) instr->valu().opsel_lo[i] = true; + else + instr->valu().opsel_lo[i] = conv->valu().opsel[0]; bool neg = conv->valu().neg[0]; bool abs = conv->valu().abs[0]; if (!instr->valu().abs[i]) { -- 2.7.4