From 8ab032fbe5e4e27d44dc9da9c3289f741a3334c9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 6 Dec 2022 08:05:54 -0500 Subject: [PATCH] InstCombine: Fold fneg (copysign x, y) -> copysign x, (fneg y) --- llvm/include/llvm/IR/IRBuilder.h | 8 ++++++++ .../Transforms/InstCombine/InstCombineAddSub.cpp | 21 ++++++++++++++++++- .../Transforms/InstCombine/copysign-fneg-fabs.ll | 24 +++++++++++----------- .../InstCombine/unordered-compare-and-ordered.ll | 4 ++-- 4 files changed, 42 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index e57c04a..89722ca 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -944,6 +944,14 @@ public: return CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS, nullptr, Name); } + /// Create call to the copysign intrinsic. + CallInst *CreateCopySign(Value *LHS, Value *RHS, + Instruction *FMFSource = nullptr, + const Twine &Name = "") { + return CreateBinaryIntrinsic(Intrinsic::copysign, LHS, RHS, FMFSource, + Name); + } + /// Create a call to the arithmetic_fence intrinsic. CallInst *CreateArithmeticFence(Value *Val, Type *DstType, const Twine &Name = "") { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c77b495..606ebe3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2436,9 +2436,13 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) { if (Instruction *R = hoistFNegAboveFMulFDiv(I, Builder)) return R; + Value *OneUse; + if (!match(Op, m_OneUse(m_Value(OneUse)))) + return nullptr; + // Try to eliminate fneg if at least 1 arm of the select is negated. Value *Cond; - if (match(Op, m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y))))) { + if (match(OneUse, m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))) { // Unlike most transforms, this one is not safe to propagate nsz unless // it is present on the original select. We union the flags from the select // and fneg and then remove nsz if needed. @@ -2470,6 +2474,21 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) { } } + // fneg (copysign x, y) -> copysign x, (fneg y) + if (match(OneUse, m_CopySign(m_Value(X), m_Value(Y)))) { + // The source copysign has an additional value input, so we can't propagate + // flags the copysign doesn't also have. + FastMathFlags FMF = I.getFastMathFlags(); + FMF &= cast(OneUse)->getFastMathFlags(); + + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(FMF); + + Value *NegY = Builder.CreateFNeg(Y); + Value *NewCopySign = Builder.CreateCopySign(X, NegY); + return replaceInstUsesWith(I, NewCopySign); + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll b/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll index b8512e6..63596a0 100644 --- a/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll +++ b/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll @@ -68,9 +68,9 @@ define half @copysign_fneg_fabs_y(half %x, half %y) { define half @fneg_copysign(half %x, half %y) { ; CHECK-LABEL: @fneg_copysign( -; CHECK-NEXT: [[COPYSIGN:%.*]] = call half @llvm.copysign.f16(half [[X:%.*]], half [[Y:%.*]]) -; CHECK-NEXT: [[FNEG_COPYSIGN:%.*]] = fneg half [[COPYSIGN]] -; CHECK-NEXT: ret half [[FNEG_COPYSIGN]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg half [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.copysign.f16(half [[X:%.*]], half [[TMP1]]) +; CHECK-NEXT: ret half [[TMP2]] ; %copysign = call half @llvm.copysign.f16(half %x, half %y) %fneg.copysign = fneg half %copysign @@ -102,9 +102,9 @@ define half @fabs_copysign(half %x, half %y) { define <2 x half> @fneg_copysign_vector(<2 x half> %x, <2 x half> %y) { ; CHECK-LABEL: @fneg_copysign_vector( -; CHECK-NEXT: [[COPYSIGN:%.*]] = call <2 x half> @llvm.copysign.v2f16(<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) -; CHECK-NEXT: [[FNEG_COPYSIGN:%.*]] = fneg <2 x half> [[COPYSIGN]] -; CHECK-NEXT: ret <2 x half> [[FNEG_COPYSIGN]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x half> [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x half> @llvm.copysign.v2f16(<2 x half> [[X:%.*]], <2 x half> [[TMP1]]) +; CHECK-NEXT: ret <2 x half> [[TMP2]] ; %copysign = call <2 x half> @llvm.copysign.v2f16(<2 x half> %x, <2 x half> %y) %fneg.copysign = fneg <2 x half> %copysign @@ -135,9 +135,9 @@ define <2 x half> @fabs_copysign_vector(<2 x half> %x, <2 x half> %y) { define half @fneg_copysign_flags(half %x, half %y) { ; CHECK-LABEL: @fneg_copysign_flags( -; CHECK-NEXT: [[COPYSIGN:%.*]] = call nnan nsz half @llvm.copysign.f16(half [[X:%.*]], half [[Y:%.*]]) -; CHECK-NEXT: [[FNEG_COPYSIGN:%.*]] = fneg ninf nsz half [[COPYSIGN]] -; CHECK-NEXT: ret half [[FNEG_COPYSIGN]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz half [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call nsz half @llvm.copysign.f16(half [[X:%.*]], half [[TMP1]]) +; CHECK-NEXT: ret half [[TMP2]] ; %copysign = call nnan nsz half @llvm.copysign.f16(half %x, half %y) %fneg.copysign = fneg ninf nsz half %copysign @@ -159,9 +159,9 @@ define half @fneg_fabs_copysign_flags(half %x, half %y) { ; Make sure we don't break things by polluting copysign with nsz define half @fneg_nsz_copysign(half %x, half %y) { ; CHECK-LABEL: @fneg_nsz_copysign( -; CHECK-NEXT: [[COPYSIGN:%.*]] = call half @llvm.copysign.f16(half [[X:%.*]], half [[Y:%.*]]) -; CHECK-NEXT: [[FNEG_COPYSIGN:%.*]] = fneg nsz half [[COPYSIGN]] -; CHECK-NEXT: ret half [[FNEG_COPYSIGN]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg half [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.copysign.f16(half [[X:%.*]], half [[TMP1]]) +; CHECK-NEXT: ret half [[TMP2]] ; %copysign = call half @llvm.copysign.f16(half %x, half %y) %fneg.copysign = fneg nsz half %copysign diff --git a/llvm/test/Transforms/InstCombine/unordered-compare-and-ordered.ll b/llvm/test/Transforms/InstCombine/unordered-compare-and-ordered.ll index 36d7eb9..8ab1f13 100644 --- a/llvm/test/Transforms/InstCombine/unordered-compare-and-ordered.ll +++ b/llvm/test/Transforms/InstCombine/unordered-compare-and-ordered.ll @@ -480,8 +480,8 @@ define i1 @fcmp_ord_and_copysign_fneg_ueq(half %x, half %y, half %z) { define i1 @fcmp_ord_and_fneg_copysign_ueq(half %x, half %y, half %z) { ; CHECK-LABEL: @fcmp_ord_and_fneg_copysign_ueq( -; CHECK-NEXT: [[COPYSIGN_X_Y:%.*]] = call half @llvm.copysign.f16(half [[X:%.*]], half [[Z:%.*]]) -; CHECK-NEXT: [[FNEG_COPYSIGN:%.*]] = fneg half [[COPYSIGN_X_Y]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg half [[Z:%.*]] +; CHECK-NEXT: [[FNEG_COPYSIGN:%.*]] = call half @llvm.copysign.f16(half [[X:%.*]], half [[TMP1]]) ; CHECK-NEXT: [[ORD:%.*]] = fcmp ord half [[X]], 0xH0000 ; CHECK-NEXT: [[UEQ:%.*]] = fcmp ueq half [[FNEG_COPYSIGN]], [[Y:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and i1 [[ORD]], [[UEQ]] -- 2.7.4