From b7287a82d33b6cd1760ac7e399ba92ae52057b5d Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Sat, 17 Jun 2023 17:51:49 +0100 Subject: [PATCH] [SVE][AArch64TTI] Fix invalid mla combine that miscomputes the value of inactive lanes. Consider: add(pg, a, mul_u(pg, b, c)) Although the multiply's inactive lanes are undefined, they don't contribute to the final result. The overall result of the inactive lanes come from "a" and thus the above is another form of mla rather than mla_u. --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 16 ++++++++-------- .../InstCombine/AArch64/sve-intrinsic-muladdsub.ll | 7 ++----- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 7b587da..8a00325 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1305,11 +1305,11 @@ instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { Intrinsic::aarch64_sve_fmad>(IC, II, false)) return FMAD; - if (auto FMLA_U = + if (auto FMLA = instCombineSVEVectorFuseMulAddSub( - IC, II, true)) - return FMLA_U; + Intrinsic::aarch64_sve_fmla>(IC, II, + true)) + return FMLA; return instCombineSVEVectorBinOp(IC, II); } @@ -1345,11 +1345,11 @@ instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) { Intrinsic::aarch64_sve_fnmsb>( IC, II, false)) return FMSB; - if (auto FMLS_U = + if (auto FMLS = instCombineSVEVectorFuseMulAddSub( - IC, II, true)) - return FMLS_U; + Intrinsic::aarch64_sve_fmls>(IC, II, + true)) + return FMLS; return instCombineSVEVectorBinOp(IC, II); } diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll index a1c6a37..f9239dd 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll @@ -14,11 +14,10 @@ define @combine_fmuladd_1( %p, %2 } -; TODO: Test highlights an invalid combine! ; fadd(a, fmul_u(b, c)) -> fmla(a, b, c) define @combine_fmuladd_2( %p, %a, %b, %c) #0 { ; CHECK-LABEL: @combine_fmuladd_2( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmla.u.nxv8f16( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmla.nxv8f16( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) ; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %p, %b, %c) @@ -109,11 +108,9 @@ define @combine_fmulsub_1( %p, %2 } -; TODO: Test highlights an invalid combine! -; fsub(a, fmul_u(b, c)) -> fmls(a, b, c) define @combine_fmulsub_2( %p, %a, %b, %c) #0 { ; CHECK-LABEL: @combine_fmulsub_2( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmls.u.nxv8f16( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmls.nxv8f16( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) ; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %p, %b, %c) -- 2.7.4