From e09115bcfdeb295c943773abfa2b29ff4e2dba11 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 22 Apr 2023 22:05:23 -0400 Subject: [PATCH] InstCombine: Try to turn is.fpclass sign checks to fcmp with 0 Try to use gt/lt compares with 0 instead of class. --- llvm/include/llvm/IR/InstrTypes.h | 11 ++ .../Transforms/InstCombine/InstCombineCalls.cpp | 115 ++++++++++++++++----- llvm/test/Transforms/InstCombine/is_fpclass.ll | 30 +++--- 3 files changed, 113 insertions(+), 43 deletions(-) diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 53aff55..9ed794d 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -837,6 +837,17 @@ public: return getOrderedPredicate(getPredicate()); } + /// Returns the unordered variant of a floating point compare. + /// + /// For example, OEQ -> UEQ, OLT -> ULT, OEQ -> UEQ + static Predicate getUnorderedPredicate(Predicate Pred) { + return static_cast(Pred | FCMP_UNO); + } + + Predicate getUnorderedPredicate() const { + return getUnorderedPredicate(getPredicate()); + } + /// For example, EQ -> NE, UGT -> ULE, SLT -> SGE, /// OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc. /// @returns the inverse predicate for predicate provided in \p pred. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index fb67e9f..73c0e35 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -813,22 +813,75 @@ InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { return nullptr; } -/// \returns true if the test performed by llvm.is.fpclass(x, \p Mask) is -/// equivalent to fcmp oeq x, 0.0 with the floating-point environment assumed -/// for \p F for type \p Ty -static bool fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty) { - if (Mask == fcZero) - return F.getDenormalMode(Ty->getScalarType()->getFltSemantics()).Input == - DenormalMode::IEEE; +static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) { + Ty = Ty->getScalarType(); + return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE; +} + +static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) { + Ty = Ty->getScalarType(); + return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero(); +} - if (Mask == (fcZero | fcSubnormal)) { - DenormalMode::DenormalModeKind InputMode = - F.getDenormalMode(Ty->getScalarType()->getFltSemantics()).Input; - return InputMode == DenormalMode::PreserveSign || - InputMode == DenormalMode::PositiveZero; +/// \returns the compare predicate type if the test performed by +/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the +/// floating-point environment assumed for \p F for type \p Ty +static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, + const Function &F, Type *Ty) { + switch (static_cast(Mask)) { + case fcZero: + if (inputDenormalIsIEEE(F, Ty)) + return FCmpInst::FCMP_OEQ; + break; + case fcZero | fcSubnormal: + if (inputDenormalIsDAZ(F, Ty)) + return FCmpInst::FCMP_OEQ; + break; + case fcPositive | fcNegZero: + if (inputDenormalIsIEEE(F, Ty)) + return FCmpInst::FCMP_OGE; + break; + case fcPositive | fcNegZero | fcNegSubnormal: + if (inputDenormalIsDAZ(F, Ty)) + return FCmpInst::FCMP_OGE; + break; + case fcPosSubnormal | fcPosNormal | fcPosInf: + if (inputDenormalIsIEEE(F, Ty)) + return FCmpInst::FCMP_OGT; + break; + case fcNegative | fcPosZero: + if (inputDenormalIsIEEE(F, Ty)) + return FCmpInst::FCMP_OLE; + break; + case fcNegative | fcPosZero | fcPosSubnormal: + if (inputDenormalIsDAZ(F, Ty)) + return FCmpInst::FCMP_OLE; + break; + case fcNegSubnormal | fcNegNormal | fcNegInf: + if (inputDenormalIsIEEE(F, Ty)) + return FCmpInst::FCMP_OLT; + break; + case fcPosNormal | fcPosInf: + if (inputDenormalIsDAZ(F, Ty)) + return FCmpInst::FCMP_OGT; + break; + case fcNegNormal | fcNegInf: + if (inputDenormalIsDAZ(F, Ty)) + return FCmpInst::FCMP_OLT; + break; + case ~fcZero & ~fcNan: + if (inputDenormalIsIEEE(F, Ty)) + return FCmpInst::FCMP_ONE; + break; + case ~(fcZero | fcSubnormal) & ~fcNan: + if (inputDenormalIsDAZ(F, Ty)) + return FCmpInst::FCMP_ONE; + break; + default: + break; } - return false; + return FCmpInst::BAD_FCMP_PREDICATE; } Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) { @@ -905,24 +958,30 @@ Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) { return replaceInstUsesWith(II, FCmp); } - if (!IsStrict && (IsOrdered || IsUnordered) && - fpclassTestIsFCmp0(OrderedMask, *II.getFunction(), Src0->getType())) { - Constant *Zero = ConstantFP::getZero(Src0->getType()); - // Equivalent of == 0. - Value *FCmp = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Zero) - : Builder.CreateFCmpOEQ(Src0, Zero); - FCmp->takeName(&II); - return replaceInstUsesWith(II, FCmp); - } + FCmpInst::Predicate PredType = FCmpInst::BAD_FCMP_PREDICATE; + // Try to replace with an fcmp with 0 + // + // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0 + // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0 + // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0 + // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0 + // + // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0 + // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0 + // + // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0 + // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0 + // if (!IsStrict && (IsOrdered || IsUnordered) && - fpclassTestIsFCmp0(OrderedInvertedMask, *II.getFunction(), - Src0->getType())) { + (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(), + Src0->getType())) != + FCmpInst::BAD_FCMP_PREDICATE) { Constant *Zero = ConstantFP::getZero(Src0->getType()); - - // Equivalent of !(x == 0). - Value *FCmp = IsUnordered ? Builder.CreateFCmpUNE(Src0, Zero) - : Builder.CreateFCmpONE(Src0, Zero); + // Equivalent of == 0. + Value *FCmp = Builder.CreateFCmp( + IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType, + Src0, Zero); FCmp->takeName(&II); return replaceInstUsesWith(II, FCmp); diff --git a/llvm/test/Transforms/InstCombine/is_fpclass.ll b/llvm/test/Transforms/InstCombine/is_fpclass.ll index 95c4178..bb76fdf 100644 --- a/llvm/test/Transforms/InstCombine/is_fpclass.ll +++ b/llvm/test/Transforms/InstCombine/is_fpclass.ll @@ -2790,7 +2790,7 @@ define i1 @test_class_is_pzero_psub_pnorm_pinf_nan__ieee(float %arg) #0 { define i1 @test_class_is_psub_pnorm_pinf__ieee(float %arg) #0 { ; CHECK-LABEL: @test_class_is_psub_pnorm_pinf__ieee( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 896) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp ogt float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 896) @@ -2817,7 +2817,7 @@ define i1 @test_class_is_psub_pnorm_pinf_qnan__ieee(float %arg) #0 { define i1 @test_class_is_psub_pnorm_pinf_nan__ieee(float %arg) #0 { ; CHECK-LABEL: @test_class_is_psub_pnorm_pinf_nan__ieee( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 899) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp ugt float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 899) @@ -2853,7 +2853,7 @@ define i1 @test_class_is_pzero_pnorm_pinf_nan__ieee(float %arg) #0 { define i1 @test_class_is_nzero_pzero_psub_pnorm_pinf__ieee(float %arg) #0 { ; CHECK-LABEL: @test_class_is_nzero_pzero_psub_pnorm_pinf__ieee( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 992) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp oge float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 992) @@ -2880,7 +2880,7 @@ define i1 @test_class_is_nzero_pzero_psub_pnorm_pinf_qnan__ieee(float %arg) #0 { define i1 @test_class_is_nzero_pzero_psub_pnorm_pinf_nan__ieee(float %arg) #0 { ; CHECK-LABEL: @test_class_is_nzero_pzero_psub_pnorm_pinf_nan__ieee( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 995) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp uge float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 995) @@ -2980,7 +2980,7 @@ define i1 @test_class_is_not_pzero_psub_pnorm_pinf_nan__ieee(float %arg) #0 { define i1 @test_class_is_not_psub_pnorm_pinf__ieee(float %arg) #0 { ; CHECK-LABEL: @test_class_is_not_psub_pnorm_pinf__ieee( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 127) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp ule float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 127) @@ -3007,7 +3007,7 @@ define i1 @test_class_is_not_psub_pnorm_pinf_qnan__ieee(float %arg) #0 { define i1 @test_class_is_not_psub_pnorm_pinf_nan__ieee(float %arg) #0 { ; CHECK-LABEL: @test_class_is_not_psub_pnorm_pinf_nan__ieee( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 124) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp ole float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 124) @@ -3070,7 +3070,7 @@ define i1 @test_class_is_not_nzero_pzero_psub_pnorm_pinf_qnan__ieee(float %arg) define i1 @test_class_is_not_nzero_pzero_psub_pnorm_pinf_nan__ieee(float %arg) #0 { ; CHECK-LABEL: @test_class_is_not_nzero_pzero_psub_pnorm_pinf_nan__ieee( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 28) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp olt float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 28) @@ -3209,7 +3209,7 @@ define i1 @test_class_is_psub_pnorm_pinf_nan__daz(float %arg) #1 { define i1 @test_class_is_pnorm_pinf__daz(float %arg) #1 { ; CHECK-LABEL: @test_class_is_pnorm_pinf__daz( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 768) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp ogt float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 768) @@ -3299,7 +3299,7 @@ define i1 @test_class_is_nsub_nzero_pzero_psub_pnorm_pinf_qnan__daz(float %arg) define i1 @test_class_is_nsub_nzero_pzero_psub_pnorm_pinf_nan__daz(float %arg) #1 { ; CHECK-LABEL: @test_class_is_nsub_nzero_pzero_psub_pnorm_pinf_nan__daz( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 1011) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp uge float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 1011) @@ -3399,7 +3399,7 @@ define i1 @test_class_is_not_psub_pnorm_pinf_nan__daz(float %arg) #1 { define i1 @test_class_is_not_pnorm_pinf__daz(float %arg) #1 { ; CHECK-LABEL: @test_class_is_not_pnorm_pinf__daz( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 255) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp ule float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 255) @@ -3489,7 +3489,7 @@ define i1 @test_class_is_not_nsub_nzero_pzero_psub_pnorm_pinf_qnan__daz(float %a define i1 @test_class_is_not_nsub_nzero_pzero_psub_pnorm_pinf_nan__daz(float %arg) #1 { ; CHECK-LABEL: @test_class_is_not_nsub_nzero_pzero_psub_pnorm_pinf_nan__daz( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 12) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp olt float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 12) @@ -3592,7 +3592,7 @@ define i1 @test_class_is_psub_pnorm_pinf_nan__dapz(float %arg) #2 { define i1 @test_class_is_pnorm_pinf__dapz(float %arg) #2 { ; CHECK-LABEL: @test_class_is_pnorm_pinf__dapz( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 768) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp ogt float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 768) @@ -3682,7 +3682,7 @@ define i1 @test_class_is_nsub_nzero_pzero_psub_pnorm_pinf_qnan__dapz(float %arg) define i1 @test_class_is_nsub_nzero_pzero_psub_pnorm_pinf_nan__dapz(float %arg) #2 { ; CHECK-LABEL: @test_class_is_nsub_nzero_pzero_psub_pnorm_pinf_nan__dapz( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 1011) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp uge float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 1011) @@ -3782,7 +3782,7 @@ define i1 @test_class_is_not_psub_pnorm_pinf_nan__dapz(float %arg) #2 { define i1 @test_class_is_not_pnorm_pinf__dapz(float %arg) #2 { ; CHECK-LABEL: @test_class_is_not_pnorm_pinf__dapz( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 255) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp ule float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 255) @@ -3872,7 +3872,7 @@ define i1 @test_class_is_not_nsub_nzero_pzero_psub_pnorm_pinf_qnan__dapz(float % define i1 @test_class_is_not_nsub_nzero_pzero_psub_pnorm_pinf_nan__dapz(float %arg) #2 { ; CHECK-LABEL: @test_class_is_not_nsub_nzero_pzero_psub_pnorm_pinf_nan__dapz( -; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 12) +; CHECK-NEXT: [[CLASS:%.*]] = fcmp olt float [[ARG:%.*]], 0.000000e+00 ; CHECK-NEXT: ret i1 [[CLASS]] ; %class = call i1 @llvm.is.fpclass.f32(float %arg, i32 12) -- 2.7.4