[FEnv] Constfold some unary constrained operations

author Serge Pavlov <sepavloff@gmail.com>

Wed, 15 Jan 2020 05:44:54 +0000 (12:44 +0700)

committer Serge Pavlov <sepavloff@gmail.com>

Sat, 28 Mar 2020 05:28:33 +0000 (12:28 +0700)
author Serge Pavlov <sepavloff@gmail.com>
Wed, 15 Jan 2020 05:44:54 +0000 (12:44 +0700)
committer Serge Pavlov <sepavloff@gmail.com>
Sat, 28 Mar 2020 05:28:33 +0000 (12:28 +0700)
diff --git a/llvm/include/llvm/IR/FPEnv.h b/llvm/include/llvm/IR/FPEnv.h

index a1e0665..0e7e96f 100644 (file)
--- a/llvm/include/llvm/IR/FPEnv.h
+++ b/llvm/include/llvm/IR/FPEnv.h
@@ -15,6 +15,7 @@
  #ifndef LLVM_IR_FLOATINGPOINT_H
  #define LLVM_IR_FLOATINGPOINT_H
  
+#include "llvm/ADT/APFloat.h"
  #include "llvm/ADT/Optional.h"
  #include "llvm/ADT/StringRef.h"
  #include <stdint.h>
@@ -66,5 +67,8 @@ Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef);
  /// input in constrained intrinsic exception behavior metadata.
  Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior);
  
+/// Converts rounding mode represented by fp::RoundingMode to the rounding mode
+/// index used by APFloat. For fp::rmDynamic it returns None.
+Optional<APFloatBase::roundingMode> getAPFloatRoundingMode(fp::RoundingMode);
  }
  #endif
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp

index 0adabf2..5efebe2 100644 (file)
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -38,6 +38,7 @@
  #include "llvm/IR/InstrTypes.h"
  #include "llvm/IR/Instruction.h"
  #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
  #include "llvm/IR/Intrinsics.h"
  #include "llvm/IR/IntrinsicsAMDGPU.h"
  #include "llvm/IR/IntrinsicsX86.h"
@@ -1396,41 +1397,19 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
  //
  
  bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
-  if (Call->isNoBuiltin() || Call->isStrictFP())
+  if (Call->isNoBuiltin())
      return false;
    switch (F->getIntrinsicID()) {
-  case Intrinsic::fabs:
-  case Intrinsic::minnum:
-  case Intrinsic::maxnum:
-  case Intrinsic::minimum:
-  case Intrinsic::maximum:
-  case Intrinsic::log:
-  case Intrinsic::log2:
-  case Intrinsic::log10:
-  case Intrinsic::exp:
-  case Intrinsic::exp2:
-  case Intrinsic::floor:
-  case Intrinsic::ceil:
-  case Intrinsic::sqrt:
-  case Intrinsic::sin:
-  case Intrinsic::cos:
-  case Intrinsic::trunc:
-  case Intrinsic::rint:
-  case Intrinsic::nearbyint:
-  case Intrinsic::pow:
-  case Intrinsic::powi:
+  // Operations that do not operate floating-point numbers and do not depend on
+  // FP environment can be folded even in strictfp functions.
    case Intrinsic::bswap:
    case Intrinsic::ctpop:
    case Intrinsic::ctlz:
    case Intrinsic::cttz:
    case Intrinsic::fshl:
    case Intrinsic::fshr:
-  case Intrinsic::fma:
-  case Intrinsic::fmuladd:
-  case Intrinsic::copysign:
    case Intrinsic::launder_invariant_group:
    case Intrinsic::strip_invariant_group:
-  case Intrinsic::round:
    case Intrinsic::masked_load:
    case Intrinsic::sadd_with_overflow:
    case Intrinsic::uadd_with_overflow:
@@ -1444,9 +1423,31 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
    case Intrinsic::usub_sat:
    case Intrinsic::smul_fix:
    case Intrinsic::smul_fix_sat:
+  case Intrinsic::bitreverse:
+  case Intrinsic::is_constant:
+    return true;
+
+  // Floating point operations cannot be folded in strictfp functions in
+  // general case. They can be folded if FP environment is known to compiler.
+  case Intrinsic::minnum:
+  case Intrinsic::maxnum:
+  case Intrinsic::minimum:
+  case Intrinsic::maximum:
+  case Intrinsic::log:
+  case Intrinsic::log2:
+  case Intrinsic::log10:
+  case Intrinsic::exp:
+  case Intrinsic::exp2:
+  case Intrinsic::sqrt:
+  case Intrinsic::sin:
+  case Intrinsic::cos:
+  case Intrinsic::pow:
+  case Intrinsic::powi:
+  case Intrinsic::fma:
+  case Intrinsic::fmuladd:
    case Intrinsic::convert_from_fp16:
    case Intrinsic::convert_to_fp16:
-  case Intrinsic::bitreverse:
+  // The intrinsics below depend on rounding mode in MXCSR.
    case Intrinsic::amdgcn_cubeid:
    case Intrinsic::amdgcn_cubema:
    case Intrinsic::amdgcn_cubesc:
@@ -1477,14 +1478,35 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
    case Intrinsic::x86_avx512_vcvtsd2usi64:
    case Intrinsic::x86_avx512_cvttsd2usi:
    case Intrinsic::x86_avx512_cvttsd2usi64:
-  case Intrinsic::is_constant:
+    return !Call->isStrictFP();
+
+  // Sign operations are actually bitwise operations, they do not raise
+  // exceptions even for SNANs.
+  case Intrinsic::fabs:
+  case Intrinsic::copysign:
+  // Non-constrained variants of rounding operations means default FP
+  // environment, they can be folded in any case.
+  case Intrinsic::ceil:
+  case Intrinsic::floor:
+  case Intrinsic::round:
+  case Intrinsic::trunc:
+  case Intrinsic::nearbyint:
+  case Intrinsic::rint:
+  // Constrained intrinsics can be folded if FP environment is known
+  // to compiler.
+  case Intrinsic::experimental_constrained_ceil:
+  case Intrinsic::experimental_constrained_floor:
+  case Intrinsic::experimental_constrained_round:
+  case Intrinsic::experimental_constrained_trunc:
+  case Intrinsic::experimental_constrained_nearbyint:
+  case Intrinsic::experimental_constrained_rint:
      return true;
    default:
      return false;
    case Intrinsic::not_intrinsic: break;
    }
  
-  if (!F->hasName())
+  if (!F->hasName() || Call->isStrictFP())
      return false;
  
    // In these cases, the check of the length is required.  We don't want to
@@ -1792,6 +1814,55 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
        return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne));
      }
  
+    // Rounding operations (floor, trunc, ceil, round and nearbyint) do not
+    // raise FP exceptions, unless the argument is signaling NaN.
+
+    Optional<APFloat::roundingMode> RM;
+    switch (IntrinsicID) {
+    default:
+      break;
+    case Intrinsic::experimental_constrained_nearbyint:
+    case Intrinsic::experimental_constrained_rint: {
+      auto CI = cast<ConstrainedFPIntrinsic>(Call);
+      Optional<fp::RoundingMode> RMOp = CI->getRoundingMode();
+      if (RMOp)
+        RM = getAPFloatRoundingMode(*RMOp);
+      if (!RM)
+        return nullptr;
+      break;
+    }
+    case Intrinsic::experimental_constrained_round:
+      RM = APFloat::rmNearestTiesToAway;
+      break;
+    case Intrinsic::experimental_constrained_ceil:
+      RM = APFloat::rmTowardPositive;
+      break;
+    case Intrinsic::experimental_constrained_floor:
+      RM = APFloat::rmTowardNegative;
+      break;
+    case Intrinsic::experimental_constrained_trunc:
+      RM = APFloat::rmTowardZero;
+      break;
+    }
+    if (RM) {
+      auto CI = cast<ConstrainedFPIntrinsic>(Call);
+      if (U.isFinite()) {
+        APFloat::opStatus St = U.roundToIntegral(*RM);
+        if (IntrinsicID == Intrinsic::experimental_constrained_rint &&
+            St == APFloat::opInexact) {
+          Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+          if (EB && *EB == fp::ebStrict)
+            return nullptr;
+        }
+      } else if (U.isSignaling()) {
+        Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+        if (EB && *EB != fp::ebIgnore)
+          return nullptr;
+        U = APFloat::getQNaN(U.getSemantics());
+      }
+      return ConstantFP::get(Ty->getContext(), U);
+    }
+
      /// We only fold functions with finite arguments. Folding NaN and inf is
      /// likely to be aborted with an exception anyway, and some host libms
      /// have known errors raising exceptions.
@@ -2573,7 +2644,7 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
  Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
                                   ArrayRef<Constant *> Operands,
                                   const TargetLibraryInfo *TLI) {
-  if (Call->isNoBuiltin() || Call->isStrictFP())
+  if (Call->isNoBuiltin())
      return nullptr;
    if (!F->hasName())
      return nullptr;
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp

index ba1f550..e62ddf7 100644 (file)
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5368,8 +5368,11 @@ Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
    ConstantArgs.reserve(NumArgs);
    for (auto &Arg : Call->args()) {
      Constant *C = dyn_cast<Constant>(&Arg);
-    if (!C)
+    if (!C) {
+      if (isa<MetadataAsValue>(Arg.get()))
+        continue;
        return nullptr;
+    }
      ConstantArgs.push_back(C);
    }
  
diff --git a/llvm/lib/IR/FPEnv.cpp b/llvm/lib/IR/FPEnv.cpp

index 49b6d9a..ab68f55 100644 (file)
--- a/llvm/lib/IR/FPEnv.cpp
+++ b/llvm/lib/IR/FPEnv.cpp
@@ -75,4 +75,20 @@ Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) {
    return ExceptStr;
  }
  
+Optional<APFloatBase::roundingMode>
+getAPFloatRoundingMode(fp::RoundingMode RM) {
+  switch (RM) {
+  case fp::rmDynamic:
+    return None;
+  case fp::rmToNearest:
+    return APFloat::rmNearestTiesToEven;
+  case fp::rmDownward:
+    return APFloat::rmTowardNegative;
+  case fp::rmUpward:
+    return APFloat::rmTowardPositive;
+  case fp::rmTowardZero:
+    return APFloat::rmTowardZero;
+  }
+  llvm_unreachable("Unexpected rounding mode");
+}
  }
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll

index 0d8795c..1e5000a 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -6373,19 +6373,17 @@ define <1 x float> @constrained_vector_ceil_v1f32() #0 {
  ; PC64LE-LABEL: constrained_vector_ceil_v1f32:
  ; PC64LE:       # %bb.0: # %entry
  ; PC64LE-NEXT:    addis 3, 2, .LCPI103_0@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI103_0@toc@l(3)
-; PC64LE-NEXT:    xsrdpip 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI103_0@toc@l
+; PC64LE-NEXT:    lfiwzx 0, 0, 3
+; PC64LE-NEXT:    xxpermdi 34, 0, 0, 2
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_ceil_v1f32:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI103_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI103_0@toc@l(3)
-; PC64LE9-NEXT:    xsrdpip 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI103_0@toc@l
+; PC64LE9-NEXT:    lfiwzx 0, 0, 3
+; PC64LE9-NEXT:    xxpermdi 34, 0, 0, 2
  ; PC64LE9-NEXT:    blr
  entry:
    %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
@@ -6400,16 +6398,14 @@ define <2 x double> @constrained_vector_ceil_v2f64() #0 {
  ; PC64LE-NEXT:    addis 3, 2, .LCPI104_0@toc@ha
  ; PC64LE-NEXT:    addi 3, 3, .LCPI104_0@toc@l
  ; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpip 34, 0
+; PC64LE-NEXT:    xxswapd 34, 0
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_ceil_v2f64:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI104_0@toc@ha
  ; PC64LE9-NEXT:    addi 3, 3, .LCPI104_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpip 34, 0
+; PC64LE9-NEXT:    lxvx 34, 0, 3
  ; PC64LE9-NEXT:    blr
  entry:
    %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
@@ -6421,50 +6417,16 @@ entry:
  define <3 x float> @constrained_vector_ceil_v3f32() #0 {
  ; PC64LE-LABEL: constrained_vector_ceil_v3f32:
  ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI105_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI105_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI105_2@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI105_1@toc@l(4)
  ; PC64LE-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
-; PC64LE-NEXT:    xsrdpip 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI105_0@toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI105_3@toc@ha
-; PC64LE-NEXT:    xsrdpip 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI105_3@toc@l
-; PC64LE-NEXT:    xsrdpip 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI105_0@toc@l
+; PC64LE-NEXT:    lvx 2, 0, 3
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_ceil_v3f32:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI105_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI105_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI105_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI105_2@toc@ha
-; PC64LE9-NEXT:    xsrdpip 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI105_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI105_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI105_3@toc@l
-; PC64LE9-NEXT:    xsrdpip 1, 1
-; PC64LE9-NEXT:    xsrdpip 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 3, 3, .LCPI105_0@toc@l
+; PC64LE9-NEXT:    lxvx 34, 0, 3
  ; PC64LE9-NEXT:    blr
  entry:
    %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
@@ -6476,31 +6438,18 @@ entry:
  define <3 x double> @constrained_vector_ceil_v3f64() #0 {
  ; PC64LE-LABEL: constrained_vector_ceil_v3f64:
  ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI106_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI106_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
  ; PC64LE-NEXT:    addis 3, 2, .LCPI106_0@toc@ha
  ; PC64LE-NEXT:    lfs 1, .LCPI106_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpip 3, 1
-; PC64LE-NEXT:    xvrdpip 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    fmr 2, 1
+; PC64LE-NEXT:    fmr 3, 1
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_ceil_v3f64:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI106_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI106_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI106_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI106_1@toc@l
-; PC64LE9-NEXT:    xsrdpip 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpip 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    lfs 1, .LCPI106_0@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 1
+; PC64LE9-NEXT:    fmr 3, 1
  ; PC64LE9-NEXT:    blr
  entry:
    %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
@@ -6513,19 +6462,17 @@ define <1 x float> @constrained_vector_floor_v1f32() #0 {
  ; PC64LE-LABEL: constrained_vector_floor_v1f32:
  ; PC64LE:       # %bb.0: # %entry
  ; PC64LE-NEXT:    addis 3, 2, .LCPI107_0@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI107_0@toc@l(3)
-; PC64LE-NEXT:    xsrdpim 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI107_0@toc@l
+; PC64LE-NEXT:    lfiwzx 0, 0, 3
+; PC64LE-NEXT:    xxpermdi 34, 0, 0, 2
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_floor_v1f32:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI107_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI107_0@toc@l(3)
-; PC64LE9-NEXT:    xsrdpim 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI107_0@toc@l
+; PC64LE9-NEXT:    lfiwzx 0, 0, 3
+; PC64LE9-NEXT:    xxpermdi 34, 0, 0, 2
  ; PC64LE9-NEXT:    blr
  entry:
    %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
@@ -6541,16 +6488,14 @@ define <2 x double> @constrained_vector_floor_v2f64() #0 {
  ; PC64LE-NEXT:    addis 3, 2, .LCPI108_0@toc@ha
  ; PC64LE-NEXT:    addi 3, 3, .LCPI108_0@toc@l
  ; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpim 34, 0
+; PC64LE-NEXT:    xxswapd 34, 0
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_floor_v2f64:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI108_0@toc@ha
  ; PC64LE9-NEXT:    addi 3, 3, .LCPI108_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpim 34, 0
+; PC64LE9-NEXT:    lxvx 34, 0, 3
  ; PC64LE9-NEXT:    blr
  entry:
    %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
@@ -6562,50 +6507,16 @@ entry:
  define <3 x float> @constrained_vector_floor_v3f32() #0 {
  ; PC64LE-LABEL: constrained_vector_floor_v3f32:
  ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI109_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI109_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI109_2@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI109_1@toc@l(4)
  ; PC64LE-NEXT:    addis 3, 2, .LCPI109_0@toc@ha
-; PC64LE-NEXT:    xsrdpim 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI109_0@toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI109_3@toc@ha
-; PC64LE-NEXT:    xsrdpim 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI109_3@toc@l
-; PC64LE-NEXT:    xsrdpim 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI109_0@toc@l
+; PC64LE-NEXT:    lvx 2, 0, 3
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_floor_v3f32:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI109_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI109_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI109_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI109_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI109_2@toc@ha
-; PC64LE9-NEXT:    xsrdpim 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI109_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI109_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI109_3@toc@l
-; PC64LE9-NEXT:    xsrdpim 1, 1
-; PC64LE9-NEXT:    xsrdpim 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 3, 3, .LCPI109_0@toc@l
+; PC64LE9-NEXT:    lxvx 34, 0, 3
  ; PC64LE9-NEXT:    blr
  entry:
    %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
@@ -6617,31 +6528,18 @@ entry:
  define <3 x double> @constrained_vector_floor_v3f64() #0 {
  ; PC64LE-LABEL: constrained_vector_floor_v3f64:
  ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI110_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI110_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
  ; PC64LE-NEXT:    addis 3, 2, .LCPI110_0@toc@ha
  ; PC64LE-NEXT:    lfs 1, .LCPI110_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpim 3, 1
-; PC64LE-NEXT:    xvrdpim 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    fmr 2, 1
+; PC64LE-NEXT:    fmr 3, 1
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_floor_v3f64:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI110_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI110_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI110_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI110_1@toc@l
-; PC64LE9-NEXT:    xsrdpim 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpim 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    lfs 1, .LCPI110_0@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 1
+; PC64LE9-NEXT:    fmr 3, 1
  ; PC64LE9-NEXT:    blr
  entry:
    %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
@@ -6654,19 +6552,17 @@ define <1 x float> @constrained_vector_round_v1f32() #0 {
  ; PC64LE-LABEL: constrained_vector_round_v1f32:
  ; PC64LE:       # %bb.0: # %entry
  ; PC64LE-NEXT:    addis 3, 2, .LCPI111_0@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI111_0@toc@l(3)
-; PC64LE-NEXT:    xsrdpi 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI111_0@toc@l
+; PC64LE-NEXT:    lfiwzx 0, 0, 3
+; PC64LE-NEXT:    xxpermdi 34, 0, 0, 2
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_round_v1f32:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI111_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI111_0@toc@l(3)
-; PC64LE9-NEXT:    xsrdpi 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI111_0@toc@l
+; PC64LE9-NEXT:    lfiwzx 0, 0, 3
+; PC64LE9-NEXT:    xxpermdi 34, 0, 0, 2
  ; PC64LE9-NEXT:    blr
  entry:
    %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
@@ -6681,16 +6577,14 @@ define <2 x double> @constrained_vector_round_v2f64() #0 {
  ; PC64LE-NEXT:    addis 3, 2, .LCPI112_0@toc@ha
  ; PC64LE-NEXT:    addi 3, 3, .LCPI112_0@toc@l
  ; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpi 34, 0
+; PC64LE-NEXT:    xxswapd 34, 0
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_round_v2f64:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI112_0@toc@ha
  ; PC64LE9-NEXT:    addi 3, 3, .LCPI112_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpi 34, 0
+; PC64LE9-NEXT:    lxvx 34, 0, 3
  ; PC64LE9-NEXT:    blr
  entry:
    %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
@@ -6702,50 +6596,16 @@ entry:
  define <3 x float> @constrained_vector_round_v3f32() #0 {
  ; PC64LE-LABEL: constrained_vector_round_v3f32:
  ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI113_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI113_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI113_2@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI113_1@toc@l(4)
  ; PC64LE-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
-; PC64LE-NEXT:    xsrdpi 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI113_0@toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI113_3@toc@ha
-; PC64LE-NEXT:    xsrdpi 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI113_3@toc@l
-; PC64LE-NEXT:    xsrdpi 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI113_0@toc@l
+; PC64LE-NEXT:    lvx 2, 0, 3
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_round_v3f32:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI113_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI113_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI113_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI113_2@toc@ha
-; PC64LE9-NEXT:    xsrdpi 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI113_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI113_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI113_3@toc@l
-; PC64LE9-NEXT:    xsrdpi 1, 1
-; PC64LE9-NEXT:    xsrdpi 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 3, 3, .LCPI113_0@toc@l
+; PC64LE9-NEXT:    lxvx 34, 0, 3
  ; PC64LE9-NEXT:    blr
  entry:
    %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
@@ -6758,31 +6618,20 @@ entry:
  define <3 x double> @constrained_vector_round_v3f64() #0 {
  ; PC64LE-LABEL: constrained_vector_round_v3f64:
  ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI114_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI114_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addis 4, 2, .LCPI114_1@toc@ha
  ; PC64LE-NEXT:    addis 3, 2, .LCPI114_0@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI114_1@toc@l(4)
  ; PC64LE-NEXT:    lfs 1, .LCPI114_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpi 3, 1
-; PC64LE-NEXT:    xvrdpi 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    fmr 3, 2
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_round_v3f64:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI114_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI114_0@toc@l(3)
+; PC64LE9-NEXT:    lfs 1, .LCPI114_0@toc@l(3)
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI114_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI114_1@toc@l
-; PC64LE9-NEXT:    xsrdpi 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpi 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    lfs 2, .LCPI114_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 3, 2
  ; PC64LE9-NEXT:    blr
  entry:
    %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
@@ -6795,19 +6644,17 @@ define <1 x float> @constrained_vector_trunc_v1f32() #0 {
  ; PC64LE-LABEL: constrained_vector_trunc_v1f32:
  ; PC64LE:       # %bb.0: # %entry
  ; PC64LE-NEXT:    addis 3, 2, .LCPI115_0@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI115_0@toc@l(3)
-; PC64LE-NEXT:    xsrdpiz 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI115_0@toc@l
+; PC64LE-NEXT:    lfiwzx 0, 0, 3
+; PC64LE-NEXT:    xxpermdi 34, 0, 0, 2
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_trunc_v1f32:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI115_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI115_0@toc@l(3)
-; PC64LE9-NEXT:    xsrdpiz 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI115_0@toc@l
+; PC64LE9-NEXT:    lfiwzx 0, 0, 3
+; PC64LE9-NEXT:    xxpermdi 34, 0, 0, 2
  ; PC64LE9-NEXT:    blr
  entry:
    %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
@@ -6822,16 +6669,14 @@ define <2 x double> @constrained_vector_trunc_v2f64() #0 {
  ; PC64LE-NEXT:    addis 3, 2, .LCPI116_0@toc@ha
  ; PC64LE-NEXT:    addi 3, 3, .LCPI116_0@toc@l
  ; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpiz 34, 0
+; PC64LE-NEXT:    xxswapd 34, 0
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_trunc_v2f64:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI116_0@toc@ha
  ; PC64LE9-NEXT:    addi 3, 3, .LCPI116_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpiz 34, 0
+; PC64LE9-NEXT:    lxvx 34, 0, 3
  ; PC64LE9-NEXT:    blr
  entry:
    %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
@@ -6843,50 +6688,16 @@ entry:
  define <3 x float> @constrained_vector_trunc_v3f32() #0 {
  ; PC64LE-LABEL: constrained_vector_trunc_v3f32:
  ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI117_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI117_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI117_2@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI117_1@toc@l(4)
  ; PC64LE-NEXT:    addis 3, 2, .LCPI117_0@toc@ha
-; PC64LE-NEXT:    xsrdpiz 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI117_0@toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI117_3@toc@ha
-; PC64LE-NEXT:    xsrdpiz 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI117_3@toc@l
-; PC64LE-NEXT:    xsrdpiz 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI117_0@toc@l
+; PC64LE-NEXT:    lvx 2, 0, 3
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_trunc_v3f32:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI117_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI117_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI117_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI117_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI117_2@toc@ha
-; PC64LE9-NEXT:    xsrdpiz 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI117_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI117_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI117_3@toc@l
-; PC64LE9-NEXT:    xsrdpiz 1, 1
-; PC64LE9-NEXT:    xsrdpiz 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 3, 3, .LCPI117_0@toc@l
+; PC64LE9-NEXT:    lxvx 34, 0, 3
  ; PC64LE9-NEXT:    blr
  entry:
    %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
@@ -6898,31 +6709,18 @@ entry:
  define <3 x double> @constrained_vector_trunc_v3f64() #0 {
  ; PC64LE-LABEL: constrained_vector_trunc_v3f64:
  ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI118_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI118_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
  ; PC64LE-NEXT:    addis 3, 2, .LCPI118_0@toc@ha
  ; PC64LE-NEXT:    lfs 1, .LCPI118_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpiz 3, 1
-; PC64LE-NEXT:    xvrdpiz 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    fmr 2, 1
+; PC64LE-NEXT:    fmr 3, 1
  ; PC64LE-NEXT:    blr
  ;
  ; PC64LE9-LABEL: constrained_vector_trunc_v3f64:
  ; PC64LE9:       # %bb.0: # %entry
  ; PC64LE9-NEXT:    addis 3, 2, .LCPI118_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI118_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI118_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI118_1@toc@l
-; PC64LE9-NEXT:    xsrdpiz 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpiz 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    lfs 1, .LCPI118_0@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 1
+; PC64LE9-NEXT:    fmr 3, 1
  ; PC64LE9-NEXT:    blr
  entry:
    %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
diff --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll

new file mode 100644 (file)

index 0000000..d53de99
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll
@@ -0,0 +1,244 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+
+; Verify that floor(10.1) is folded to 10.0 when the exception behavior is 'ignore'.
+define double @floor_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.floor.f64(
+                                               double 1.010000e+01,
+                                               metadata !"fpexcept.ignore") #0
+  ret double %result
+  ; CHECK-LABEL: @floor_01
+  ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that floor(-10.1) is folded to -11.0 when the exception behavior is not 'ignore'.
+define double @floor_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.floor.f64(
+                                               double -1.010000e+01,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @floor_02
+  ; CHECK: ret double -1.100000e+01
+}
+
+; Verify that ceil(10.1) is folded to 11.0 when the exception behavior is 'ignore'.
+define double @ceil_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.ceil.f64(
+                                               double 1.010000e+01,
+                                               metadata !"fpexcept.ignore") #0
+  ret double %result
+  ; CHECK-LABEL: @ceil_01
+  ; CHECK: ret double 1.100000e+01
+}
+
+; Verify that ceil(-10.1) is folded to -10.0 when the exception behavior is not 'ignore'.
+define double @ceil_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.ceil.f64(
+                                               double -1.010000e+01,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @ceil_02
+  ; CHECK: ret double -1.000000e+01
+}
+
+; Verify that trunc(10.1) is folded to 10.0 when the exception behavior is 'ignore'.
+define double @trunc_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.trunc.f64(
+                                               double 1.010000e+01,
+                                               metadata !"fpexcept.ignore") #0
+  ret double %result
+  ; CHECK-LABEL: @trunc_01
+  ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that trunc(-10.1) is folded to -10.0 when the exception behavior is NOT 'ignore'.
+define double @trunc_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.trunc.f64(
+                                               double -1.010000e+01,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @trunc_02
+  ; CHECK: ret double -1.000000e+01
+}
+
+; Verify that round(10.5) is folded to 11.0 when the exception behavior is 'ignore'.
+define double @round_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.round.f64(
+                                               double 1.050000e+01,
+                                               metadata !"fpexcept.ignore") #0
+  ret double %result
+  ; CHECK-LABEL: @round_01
+  ; CHECK: ret double 1.100000e+01
+}
+
+; Verify that floor(-10.5) is folded to -11.0 when the exception behavior is NOT 'ignore'.
+define double @round_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.round.f64(
+                                               double -1.050000e+01,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @round_02
+  ; CHECK: ret double -1.100000e+01
+}
+
+; Verify that nearbyint(10.5) is folded to 11.0 when the rounding mode is 'upward'.
+define double @nearbyint_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.nearbyint.f64(
+                                               double 1.050000e+01,
+                                               metadata !"round.upward",
+                                               metadata !"fpexcept.ignore") #0
+  ret double %result
+  ; CHECK-LABEL: @nearbyint_01
+  ; CHECK: ret double 1.100000e+01
+}
+
+; Verify that nearbyint(10.5) is folded to 10.0 when the rounding mode is 'downward'.
+define double @nearbyint_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.nearbyint.f64(
+                                               double 1.050000e+01,
+                                               metadata !"round.downward",
+                                               metadata !"fpexcept.maytrap") #0
+  ret double %result
+  ; CHECK-LABEL: @nearbyint_02
+  ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that nearbyint(10.5) is folded to 10.0 when the rounding mode is 'towardzero'.
+define double @nearbyint_03() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.nearbyint.f64(
+                                               double 1.050000e+01,
+                                               metadata !"round.towardzero",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @nearbyint_03
+  ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that nearbyint(10.5) is folded to 10.0 when the rounding mode is 'tonearest'.
+define double @nearbyint_04() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.nearbyint.f64(
+                                               double 1.050000e+01,
+                                               metadata !"round.tonearest",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @nearbyint_04
+  ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that nearbyint(10.5) is NOT folded if the rounding mode is 'dynamic'.
+define double @nearbyint_05() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.nearbyint.f64(
+                                               double 1.050000e+01,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @nearbyint_05
+  ; CHECK: [[VAL:%.+]] = {{.*}}call double @llvm.experimental.constrained.nearbyint
+  ; CHECK: ret double [[VAL]]
+}
+
+; Verify that trunc(SNAN) is NOT folded if the exception behavior mode is not 'ignore'.
+define double @nonfinite_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.trunc.f64(
+                                               double 0x7ff4000000000000,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @nonfinite_01
+  ; CHECK: [[VAL:%.+]] = {{.*}}call double @llvm.experimental.constrained.trunc
+  ; CHECK: ret double [[VAL]]
+}
+
+; Verify that trunc(SNAN) is folded to QNAN if the exception behavior mode is 'ignore'.
+define double @nonfinite_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.trunc.f64(
+                                               double 0x7ff4000000000000,
+                                               metadata !"fpexcept.ignore") #0
+  ret double %result
+  ; CHECK-LABEL: @nonfinite_02
+  ; CHECK: ret double 0x7FF8000000000000
+}
+
+; Verify that trunc(QNAN) is folded even if the exception behavior mode is not 'ignore'.
+define double @nonfinite_03() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.trunc.f64(
+                                               double 0x7ff8000000000000,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @nonfinite_03
+  ; CHECK: ret double 0x7FF8000000000000
+}
+
+; Verify that trunc(+Inf) is folded even if the exception behavior mode is not 'ignore'.
+define double @nonfinite_04() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.trunc.f64(
+                                               double 0x7ff0000000000000,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @nonfinite_04
+  ; CHECK: ret double 0x7FF0000000000000
+}
+
+; Verify that rint(10) is folded to 10.0 when the rounding mode is 'tonearest'.
+define double @rint_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.rint.f64(
+                                               double 1.000000e+01,
+                                               metadata !"round.tonearest",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @rint_01
+  ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that rint(10.1) is NOT folded to 10.0 when the exception behavior is 'strict'.
+define double @rint_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.rint.f64(
+                                               double 1.010000e+01,
+                                               metadata !"round.tonearest",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @rint_02
+  ; CHECK: [[VAL:%.+]] = {{.*}}call double @llvm.experimental.constrained.rint
+  ; CHECK: ret double [[VAL]]
+}
+
+; Verify that rint(10.1) is folded to 10.0 when the exception behavior is not 'strict'.
+define double @rint_03() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.rint.f64(
+                                               double 1.010000e+01,
+                                               metadata !"round.tonearest",
+                                               metadata !"fpexcept.maytrap") #0
+  ret double %result
+  ; CHECK-LABEL: @rint_03
+  ; CHECK: ret double 1.000000e+01
+}
+
+
+attributes #0 = { strictfp }
+
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.floor.f64(double, metadata)
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
+declare double @llvm.experimental.constrained.round.f64(double, metadata)
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+
author	Serge Pavlov <sepavloff@gmail.com>
	Wed, 15 Jan 2020 05:44:54 +0000 (12:44 +0700)
committer	Serge Pavlov <sepavloff@gmail.com>
	Sat, 28 Mar 2020 05:28:33 +0000 (12:28 +0700)
llvm/include/llvm/IR/FPEnv.h		patch \| blob \| history
llvm/lib/Analysis/ConstantFolding.cpp		patch \| blob \| history
llvm/lib/Analysis/InstructionSimplify.cpp		patch \| blob \| history
llvm/lib/IR/FPEnv.cpp		patch \| blob \| history
llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll		patch \| blob \| history
llvm/test/Transforms/InstSimplify/constfold-constrained.ll	[new file with mode: 0644]	patch \| blob