[ConstantFolding] Fold constrained arithmetic intrinsics

author Serge Pavlov <sepavloff@gmail.com>

Tue, 4 May 2021 13:43:56 +0000 (20:43 +0700)

committer Serge Pavlov <sepavloff@gmail.com>

Fri, 23 Jul 2021 07:39:51 +0000 (14:39 +0700)
author Serge Pavlov <sepavloff@gmail.com>
Tue, 4 May 2021 13:43:56 +0000 (20:43 +0700)
committer Serge Pavlov <sepavloff@gmail.com>
Fri, 23 Jul 2021 07:39:51 +0000 (14:39 +0700)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp

index af25dabb0e17c4d8eb17251ce5524dbb12a41c55..b28a0d6c78cd285963b657c2faf372156162175a 100644 (file)
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1593,6 +1593,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
    case Intrinsic::rint:
    // Constrained intrinsics can be folded if FP environment is known
    // to compiler.
+  case Intrinsic::experimental_constrained_fma:
+  case Intrinsic::experimental_constrained_fmuladd:
+  case Intrinsic::experimental_constrained_fadd:
+  case Intrinsic::experimental_constrained_fsub:
+  case Intrinsic::experimental_constrained_fmul:
+  case Intrinsic::experimental_constrained_fdiv:
+  case Intrinsic::experimental_constrained_frem:
    case Intrinsic::experimental_constrained_ceil:
    case Intrinsic::experimental_constrained_floor:
    case Intrinsic::experimental_constrained_round:
@@ -1854,6 +1861,56 @@ static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
    return false;
  }
  
+/// Checks if the given intrinsic call, which evaluates to constant, is allowed
+/// to be folded.
+///
+/// \param CI Constrained intrinsic call.
+/// \param St Exception flags raised during constant evaluation.
+static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
+                               APFloat::opStatus St) {
+  Optional<RoundingMode> ORM = CI->getRoundingMode();
+  Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+
+  // If the operation does not change exception status flags, it is safe
+  // to fold.
+  if (St == APFloat::opStatus::opOK) {
+    // When FP exceptions are not ignored, intrinsic call will not be
+    // eliminated, because it is considered as having side effect. But we
+    // know that its evaluation does not raise exceptions, so side effect
+    // is absent. To allow removing the call, mark it as not accessing memory.
+    if (EB && *EB != fp::ExceptionBehavior::ebIgnore)
+      CI->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+    return true;
+  }
+
+  // If evaluation raised FP exception, the result can depend on rounding
+  // mode. If the latter is unknown, folding is not possible.
+  if (!ORM || *ORM == RoundingMode::Dynamic)
+    return false;
+
+  // If FP exceptions are ignored, fold the call, even if such exception is
+  // raised.
+  if (!EB || *EB != fp::ExceptionBehavior::ebStrict)
+    return true;
+
+  // Leave the calculation for runtime so that exception flags be correctly set
+  // in hardware.
+  return false;
+}
+
+/// Returns the rounding mode that should be used for constant evaluation.
+static RoundingMode
+getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) {
+  Optional<RoundingMode> ORM = CI->getRoundingMode();
+  if (!ORM || *ORM == RoundingMode::Dynamic)
+    // Even if the rounding mode is unknown, try evaluating the operation.
+    // If it does not raise inexact exception, rounding was not applied,
+    // so the result is exact and does not depend on rounding mode. Whether
+    // other FP exceptions are raised, it does not depend on rounding mode.
+    return RoundingMode::NearestTiesToEven;
+  return *ORM;
+}
+
  static Constant *ConstantFoldScalarCall1(StringRef Name,
                                           Intrinsic::ID IntrinsicID,
                                           Type *Ty,
@@ -2356,16 +2413,45 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
      }
    }
  
-  if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+  if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
      if (!Ty->isFloatingPointTy())
        return nullptr;
      APFloat Op1V = Op1->getValueAPF();
  
-    if (auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+    if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
        if (Op2->getType() != Op1->getType())
          return nullptr;
        APFloat Op2V = Op2->getValueAPF();
  
+      if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
+        RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
+        APFloat Res = Op1V;
+        APFloat::opStatus St;
+        switch (IntrinsicID) {
+        default:
+          return nullptr;
+        case Intrinsic::experimental_constrained_fadd:
+          St = Res.add(Op2V, RM);
+          break;
+        case Intrinsic::experimental_constrained_fsub:
+          St = Res.subtract(Op2V, RM);
+          break;
+        case Intrinsic::experimental_constrained_fmul:
+          St = Res.multiply(Op2V, RM);
+          break;
+        case Intrinsic::experimental_constrained_fdiv:
+          St = Res.divide(Op2V, RM);
+          break;
+        case Intrinsic::experimental_constrained_frem:
+          St = Res.mod(Op2V);
+          break;
+        }
+        if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),
+                               St))
+          return ConstantFP::get(Ty->getContext(), Res);
+        return nullptr;
+      }
+
        switch (IntrinsicID) {
        default:
          break;
@@ -2437,6 +2523,8 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
          break;
        }
      } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+      if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+        return nullptr;
        if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
          return ConstantFP::get(
              Ty->getContext(),
@@ -2772,6 +2860,25 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
          const APFloat &C1 = Op1->getValueAPF();
          const APFloat &C2 = Op2->getValueAPF();
          const APFloat &C3 = Op3->getValueAPF();
+
+        if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
+          RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
+          APFloat Res = C1;
+          APFloat::opStatus St;
+          switch (IntrinsicID) {
+          default:
+            return nullptr;
+          case Intrinsic::experimental_constrained_fma:
+          case Intrinsic::experimental_constrained_fmuladd:
+            St = Res.fusedMultiplyAdd(C2, C3, RM);
+            break;
+          }
+          if (mayFoldConstrained(
+                  const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St))
+            return ConstantFP::get(Ty->getContext(), Res);
+          return nullptr;
+        }
+
          switch (IntrinsicID) {
          default: break;
          case Intrinsic::amdgcn_fma_legacy: {
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp

index d5e301bf5caf831506d8c3fce53175ef87b5ffa3..d95c053c25a1021c6e608d9882bbb7063ab925cf 100644 (file)
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -491,6 +491,16 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
      if (isMathLibCallNoop(Call, TLI))
        return true;
  
+  // To express possible interaction with floating point environment constrained
+  // intrinsics are described as if they access memory. So they look like having
+  // side effect but actually do not have it unless they raise floating point
+  // exception. If FP exceptions are ignored, the intrinsic may be deleted.
+  if (auto *CI = dyn_cast<ConstrainedFPIntrinsic>(I)) {
+    Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+    if (!EB || *EB == fp::ExceptionBehavior::ebIgnore)
+      return true;
+  }
+
    return false;
  }
  
diff --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll

index 635bcd2f716a7cf3f82a40205f7761aa3f537c2f..4db5fbff30affd0014ab5a3ec611dd599405415c 100644 (file)
--- a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll
+++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll
@@ -234,6 +234,186 @@ entry:
    ret double %result
  }
  
+define float @fadd_01() #0 {
+; CHECK-LABEL: @fadd_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret float 3.000000e+01
+;
+entry:
+  %result = call float @llvm.experimental.constrained.fadd.f32(float 1.000000e+01, float 2.000000e+01, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret float %result
+}
+
+; Inexact result does not prevent from folding if exceptions are ignored and
+; rounding mode is known.
+define double @fadd_02() #0 {
+; CHECK-LABEL: @fadd_02(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fadd_03() #0 {
+; CHECK-LABEL: @fadd_03(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 0x4000000000000001
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.upward", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+; Inexact result prevents from folding if exceptions may be checked.
+define double @fadd_04() #0 {
+; CHECK-LABEL: @fadd_04(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 1.000000e+00, double 0x3FF0000000000001, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+; If result is exact, folding is allowed even if exceptions may be checked.
+define double @fadd_05() #0 {
+; CHECK-LABEL: @fadd_05(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 3.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+; Dynamic rounding mode does not prevent from folding if the result is exact.
+define double @fadd_06() #0 {
+; CHECK-LABEL: @fadd_06(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 3.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 2.0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+; Inexact results prevents from folding if rounding mode is unknown.
+define double @fadd_07() #0 {
+; CHECK-LABEL: @fadd_07(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 1.000000e+00, double 0x3FF0000000000001, metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]]
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+; Infinite result does not prevent from folding unless exceptions are tracked.
+define double @fadd_08() #0 {
+; CHECK-LABEL: @fadd_08(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 0x7FF0000000000000
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 0x7fEFFFFFFFFFFFFF, double 0x7fEFFFFFFFFFFFFF, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fadd_09() #0 {
+; CHECK-LABEL: @fadd_09(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 0x7fEFFFFFFFFFFFFF, double 0x7fEFFFFFFFFFFFFF, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define half @fadd_10() #0 {
+; CHECK-LABEL: @fadd_10(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret half 0xH4200
+;
+entry:
+  %result = call half @llvm.experimental.constrained.fadd.f16(half 1.0, half 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret half %result
+}
+
+define bfloat @fadd_11() #0 {
+; CHECK-LABEL: @fadd_11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret bfloat 0xR4040
+;
+entry:
+  %result = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat 1.0, bfloat 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret bfloat %result
+}
+
+define double @fsub_01() #0 {
+; CHECK-LABEL: @fsub_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fsub.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fmul_01() #0 {
+; CHECK-LABEL: @fmul_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fmul.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fdiv_01() #0 {
+; CHECK-LABEL: @fdiv_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 5.000000e-01
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fdiv.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @frem_01() #0 {
+; CHECK-LABEL: @frem_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.frem.f64(double 1.0, double 2.0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fma_01() #0 {
+; CHECK-LABEL: @fma_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 5.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fma.f64(double 1.0, double 2.0, double 3.0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fmuladd_01() #0 {
+; CHECK-LABEL: @fmuladd_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 5.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fmuladd.f64(double 1.0, double 2.0, double 3.0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
  
  attributes #0 = { strictfp }
  
@@ -243,4 +423,14 @@ declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
  declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
  declare double @llvm.experimental.constrained.round.f64(double, metadata)
  declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata)
+declare bfloat @llvm.experimental.constrained.fadd.bf16(bfloat, bfloat, metadata, metadata)
+declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata)
  
diff --git a/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll b/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll

index e833d1889e2fc82108c431375d5f9394b124b591..82101a4ef82861215859d62c5c0bfca2ced499f8 100644 (file)
--- a/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll
+++ b/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll
@@ -12,14 +12,23 @@ define float @fdiv_constant_fold() #0 {
  
  define float @fdiv_constant_fold_strict() #0 {
  ; CHECK-LABEL: @fdiv_constant_fold_strict(
-; CHECK-NEXT:    [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 3.000000e+00, float 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0:[0-9]+]]
-; CHECK-NEXT:    ret float [[F]]
+; CHECK-NEXT:    ret float 1.500000e+00
  ;
    %f = call float @llvm.experimental.constrained.fdiv.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
  
    ret float %f
  }
  
+define float @fdiv_constant_fold_strict2() #0 {
+; CHECK-LABEL: @fdiv_constant_fold_strict2(
+; CHECK-NEXT:    [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 2.000000e+00, float 3.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    ret float [[F]]
+;
+  %f = call float @llvm.experimental.constrained.fdiv.f32(float 2.0, float 3.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+
+  ret float %f
+}
+
  define float @frem_constant_fold() #0 {
  ; CHECK-LABEL: @frem_constant_fold(
  ; CHECK-NEXT:    ret float 1.000000e+00
@@ -30,10 +39,9 @@ define float @frem_constant_fold() #0 {
  
  define float @frem_constant_fold_strict() #0 {
  ; CHECK-LABEL: @frem_constant_fold_strict(
-; CHECK-NEXT:    [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 3.000000e+00, float 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
-; CHECK-NEXT:    ret float [[F]]
+; CHECK-NEXT:    ret float 1.000000e+00
  ;
-  %f = call float @llvm.experimental.constrained.fdiv.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %f = call float @llvm.experimental.constrained.frem.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
    ret float %f
  }
author	Serge Pavlov <sepavloff@gmail.com>
	Tue, 4 May 2021 13:43:56 +0000 (20:43 +0700)
committer	Serge Pavlov <sepavloff@gmail.com>
	Fri, 23 Jul 2021 07:39:51 +0000 (14:39 +0700)
llvm/lib/Analysis/ConstantFolding.cpp		patch \| blob \| history
llvm/lib/Transforms/Utils/Local.cpp		patch \| blob \| history
llvm/test/Transforms/InstSimplify/constfold-constrained.ll		patch \| blob \| history
llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll		patch \| blob \| history