From 4a5cb957a1e17aa9c2a7f33d6cd98e59956d1aa3 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Fri, 10 Jun 2022 09:36:09 +0100
Subject: [PATCH] [AggressiveInstcombine] Conditionally fold saturated fptosi
 to llvm.fptosi.sat

This adds a fold for aggressive instcombine that converts
smin(smax(fptosi(x))) into a llvm.fptosi.sat, providing that the
saturation constants are correct and the cost of the llvm.fptosi.sat is
lower.

Unfortunately, a llvm.fptosi.sat cannot always be converted back to a
smin/smax/fptosi. The llvm.fptosi.sat intrinsic is more defined that the
original, which produces poison if the original fptosi was out of range.
The llvm.fptosi.sat will saturate any value, so needs to be expanded to
a fptosi(fpmin(fpmax(x))), which can be worse for codegeneration
depending on the target.

So this change thais conditional on the backend reporting that the
llvm.fptosi.sat is cheaper that the original smin+smax+fptost.  This is
a change to the way that AggressiveInstrcombine has worked in the past.
Instead of just being a canonicalization pass, that canonicalization can
be dependant on the target in certain specific cases.

Differential Revision: https://reviews.llvm.org/D125755
---
 .../AggressiveInstCombine.cpp                      |  80 +++++++-
 .../AggressiveInstCombine/AArch64/fptosisat.ll     |  91 +++++----
 .../AggressiveInstCombine/ARM/fptosisat.ll         | 207 +++++++++++++++------
 3 files changed, 278 insertions(+), 100 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 684da20..1fd8b88 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
@@ -365,10 +366,72 @@ static bool tryToRecognizePopCount(Instruction &I) {
   return false;
 }
 
+/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
+/// C2 saturate the value of the fp conversion. The transform is not reversable
+/// as the fptosi.sat is more defined than the input - all values produce a
+/// valid value for the fptosi.sat, where as some produce poison for original
+/// that were out of range of the integer conversion. The reversed pattern may
+/// use fmax and fmin instead. As we cannot directly reverse the transform, and
+/// it is not always profitable, we make it conditional on the cost being
+/// reported as lower by TTI.
+static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
+  // Look for min(max(fptosi, converting to fptosi_sat.
+  Value *In;
+  const APInt *MinC, *MaxC;
+  if (!match(&I, m_SMax(m_OneUse(m_SMin(m_OneUse(m_FPToSI(m_Value(In))),
+                                        m_APInt(MinC))),
+                        m_APInt(MaxC))) &&
+      !match(&I, m_SMin(m_OneUse(m_SMax(m_OneUse(m_FPToSI(m_Value(In))),
+                                        m_APInt(MaxC))),
+                        m_APInt(MinC))))
+    return false;
+
+  // Check that the constants clamp a saturate.
+  if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
+    return false;
+
+  Type *IntTy = I.getType();
+  Type *FpTy = In->getType();
+  Type *SatTy =
+      IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1);
+  if (auto *VecTy = dyn_cast<VectorType>(IntTy))
+    SatTy = VectorType::get(SatTy, VecTy->getElementCount());
+
+  // Get the cost of the intrinsic, and check that against the cost of
+  // fptosi+smin+smax
+  InstructionCost SatCost = TTI.getIntrinsicInstrCost(
+      IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),
+      TTI::TCK_RecipThroughput);
+  SatCost += TTI.getCastInstrCost(Instruction::SExt, SatTy, IntTy,
+                                  TTI::CastContextHint::None,
+                                  TTI::TCK_RecipThroughput);
+
+  InstructionCost MinMaxCost = TTI.getCastInstrCost(
+      Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None,
+      TTI::TCK_RecipThroughput);
+  MinMaxCost += TTI.getIntrinsicInstrCost(
+      IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}),
+      TTI::TCK_RecipThroughput);
+  MinMaxCost += TTI.getIntrinsicInstrCost(
+      IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}),
+      TTI::TCK_RecipThroughput);
+
+  if (SatCost >= MinMaxCost)
+    return false;
+
+  IRBuilder<> Builder(&I);
+  Function *Fn = Intrinsic::getDeclaration(I.getModule(), Intrinsic::fptosi_sat,
+                                           {SatTy, FpTy});
+  Value *Sat = Builder.CreateCall(Fn, In);
+  I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy));
+  return true;
+}
+
 /// This is the entry point for folds that could be implemented in regular
 /// InstCombine, but they are separated because they are not expected to
 /// occur frequently and/or have more than a constant-length pattern match.
-static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
+static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
+                                TargetTransformInfo &TTI) {
   bool MadeChange = false;
   for (BasicBlock &BB : F) {
     // Ignore unreachable basic blocks.
@@ -384,6 +447,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
       MadeChange |= foldAnyOrAllBitsSet(I);
       MadeChange |= foldGuardedFunnelShift(I, DT);
       MadeChange |= tryToRecognizePopCount(I);
+      MadeChange |= tryToFPToSat(I, TTI);
     }
   }
 
@@ -397,13 +461,13 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
 
 /// This is the entry point for all transforms. Pass manager differences are
 /// handled in the callers of this function.
-static bool runImpl(Function &F, AssumptionCache &AC, TargetLibraryInfo &TLI,
-                    DominatorTree &DT) {
+static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
+                    TargetLibraryInfo &TLI, DominatorTree &DT) {
   bool MadeChange = false;
   const DataLayout &DL = F.getParent()->getDataLayout();
   TruncInstCombine TIC(AC, TLI, DL, DT);
   MadeChange |= TIC.run(F);
-  MadeChange |= foldUnusualPatterns(F, DT);
+  MadeChange |= foldUnusualPatterns(F, DT, TTI);
   return MadeChange;
 }
 
@@ -413,6 +477,7 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
   AU.addRequired<AssumptionCacheTracker>();
   AU.addRequired<DominatorTreeWrapperPass>();
   AU.addRequired<TargetLibraryInfoWrapperPass>();
+  AU.addRequired<TargetTransformInfoWrapperPass>();
   AU.addPreserved<AAResultsWrapperPass>();
   AU.addPreserved<BasicAAWrapperPass>();
   AU.addPreserved<DominatorTreeWrapperPass>();
@@ -423,7 +488,8 @@ bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  return runImpl(F, AC, TLI, DT);
+  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  return runImpl(F, AC, TTI, TLI, DT);
 }
 
 PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
@@ -431,7 +497,8 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
-  if (!runImpl(F, AC, TLI, DT)) {
+  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+  if (!runImpl(F, AC, TTI, TLI, DT)) {
     // No changes, all analyses are preserved.
     return PreservedAnalyses::all();
   }
@@ -448,6 +515,7 @@ INITIALIZE_PASS_BEGIN(AggressiveInstCombinerLegacyPass,
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass, "aggressive-instcombine",
                     "Combine pattern based expressions", false, false)
 
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll
index fb616f4..a7eeb3b 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll
@@ -1,13 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=aggressive-instcombine -mtriple aarch64-none-eabi -S | FileCheck %s
-; RUN: opt < %s -passes=aggressive-instcombine -mtriple aarch64-none-eabi -mattr=+fullfp16 -S | FileCheck %s
+; RUN: opt < %s -passes=aggressive-instcombine -mtriple aarch64-none-eabi -S | FileCheck %s --check-prefixes=CHECK,CHECK-FP
+; RUN: opt < %s -passes=aggressive-instcombine -mtriple aarch64-none-eabi -mattr=+fullfp16 -S | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
 
 define i64 @f32_i32(float %in) {
 ; CHECK-LABEL: @f32_i32(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64
-; CHECK-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647)
-; CHECK-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648)
-; CHECK-NEXT:    ret i64 [[MAX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[IN:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %conv = fptosi float %in to i64
   %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
@@ -56,10 +55,9 @@ define i32 @f32_i8(float %in) {
 
 define i64 @f64_i32(double %in) {
 ; CHECK-LABEL: @f64_i32(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[IN:%.*]] to i64
-; CHECK-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647)
-; CHECK-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648)
-; CHECK-NEXT:    ret i64 [[MAX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f64(double [[IN:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %conv = fptosi double %in to i64
   %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
@@ -94,11 +92,16 @@ define i32 @f64_i16(double %in) {
 }
 
 define i64 @f16_i32(half %in) {
-; CHECK-LABEL: @f16_i32(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64
-; CHECK-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647)
-; CHECK-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648)
-; CHECK-NEXT:    ret i64 [[MAX]]
+; CHECK-FP-LABEL: @f16_i32(
+; CHECK-FP-NEXT:    [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64
+; CHECK-FP-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647)
+; CHECK-FP-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648)
+; CHECK-FP-NEXT:    ret i64 [[MAX]]
+;
+; CHECK-FP16-LABEL: @f16_i32(
+; CHECK-FP16-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]])
+; CHECK-FP16-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-FP16-NEXT:    ret i64 [[TMP2]]
 ;
   %conv = fptosi half %in to i64
   %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
@@ -147,10 +150,9 @@ define i32 @f16_i8(half %in) {
 
 define <2 x i64> @v2f32_i32(<2 x float> %in) {
 ; CHECK-LABEL: @v2f32_i32(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi <2 x float> [[IN:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> <i64 2147483647, i64 2147483647>)
-; CHECK-NEXT:    [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> <i64 -2147483648, i64 -2147483648>)
-; CHECK-NEXT:    ret <2 x i64> [[MAX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
 ;
   %conv = fptosi <2 x float> %in to <2 x i64>
   %min = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>)
@@ -160,10 +162,9 @@ define <2 x i64> @v2f32_i32(<2 x float> %in) {
 
 define <4 x i64> @v4f32_i32(<4 x float> %in) {
 ; CHECK-LABEL: @v4f32_i32(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i64>
-; CHECK-NEXT:    [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
-; CHECK-NEXT:    [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
-; CHECK-NEXT:    ret <4 x i64> [[MAX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64>
+; CHECK-NEXT:    ret <4 x i64> [[TMP2]]
 ;
   %conv = fptosi <4 x float> %in to <4 x i64>
   %min = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -173,10 +174,9 @@ define <4 x i64> @v4f32_i32(<4 x float> %in) {
 
 define <8 x i64> @v8f32_i32(<8 x float> %in) {
 ; CHECK-LABEL: @v8f32_i32(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi <8 x float> [[IN:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
-; CHECK-NEXT:    [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
-; CHECK-NEXT:    ret <8 x i64> [[MAX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i32> [[TMP1]] to <8 x i64>
+; CHECK-NEXT:    ret <8 x i64> [[TMP2]]
 ;
   %conv = fptosi <8 x float> %in to <8 x i64>
   %min = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %conv, <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -185,11 +185,16 @@ define <8 x i64> @v8f32_i32(<8 x float> %in) {
 }
 
 define <4 x i32> @v4f16_i16(<4 x half> %in) {
-; CHECK-LABEL: @v4f16_i16(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
-; CHECK-NEXT:    [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
-; CHECK-NEXT:    ret <4 x i32> [[MAX]]
+; CHECK-FP-LABEL: @v4f16_i16(
+; CHECK-FP-NEXT:    [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32>
+; CHECK-FP-NEXT:    [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
+; CHECK-FP-NEXT:    [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+; CHECK-FP-NEXT:    ret <4 x i32> [[MAX]]
+;
+; CHECK-FP16-LABEL: @v4f16_i16(
+; CHECK-FP16-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]])
+; CHECK-FP16-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-FP16-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %conv = fptosi <4 x half> %in to <4 x i32>
   %min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
@@ -198,11 +203,16 @@ define <4 x i32> @v4f16_i16(<4 x half> %in) {
 }
 
 define <8 x i32> @v8f16_i16(<8 x half> %in) {
-; CHECK-LABEL: @v8f16_i16(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32>
-; CHECK-NEXT:    [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
-; CHECK-NEXT:    [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
-; CHECK-NEXT:    ret <8 x i32> [[MAX]]
+; CHECK-FP-LABEL: @v8f16_i16(
+; CHECK-FP-NEXT:    [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32>
+; CHECK-FP-NEXT:    [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
+; CHECK-FP-NEXT:    [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+; CHECK-FP-NEXT:    ret <8 x i32> [[MAX]]
+;
+; CHECK-FP16-LABEL: @v8f16_i16(
+; CHECK-FP16-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]])
+; CHECK-FP16-NEXT:    [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32>
+; CHECK-FP16-NEXT:    ret <8 x i32> [[TMP2]]
 ;
   %conv = fptosi <8 x half> %in to <8 x i32>
   %min = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
@@ -256,10 +266,9 @@ define i64 @f32_i32_invertedconsts(float %in) {
 
 define i64 @f32_i32_maxmin(float %in) {
 ; CHECK-LABEL: @f32_i32_maxmin(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64
-; CHECK-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[CONV]], i64 -2147483648)
-; CHECK-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[MAX]], i64 2147483647)
-; CHECK-NEXT:    ret i64 [[MIN]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[IN:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %conv = fptosi float %in to i64
   %max = call i64 @llvm.smax.i64(i64 %conv, i64 -2147483648)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll b/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll
index 6d2897b..f264638 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll
@@ -1,14 +1,24 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -S | FileCheck %s
-; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -mattr=+mve.fp -S | FileCheck %s
-; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -mattr=+mve.fp,+fp64 -S | FileCheck %s
+; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -S | FileCheck %s --check-prefixes=CHECK,CHECK-BASE
+; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -mattr=+mve.fp -S | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP
+; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -mattr=+mve.fp,+fp64 -S | FileCheck %s --check-prefixes=CHECK,CHECK-FP64
 
 define i64 @f32_i32(float %in) {
-; CHECK-LABEL: @f32_i32(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64
-; CHECK-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647)
-; CHECK-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648)
-; CHECK-NEXT:    ret i64 [[MAX]]
+; CHECK-BASE-LABEL: @f32_i32(
+; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647)
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648)
+; CHECK-BASE-NEXT:    ret i64 [[MAX]]
+;
+; CHECK-MVEFP-LABEL: @f32_i32(
+; CHECK-MVEFP-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[IN:%.*]])
+; CHECK-MVEFP-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-MVEFP-NEXT:    ret i64 [[TMP2]]
+;
+; CHECK-FP64-LABEL: @f32_i32(
+; CHECK-FP64-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[IN:%.*]])
+; CHECK-FP64-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-FP64-NEXT:    ret i64 [[TMP2]]
 ;
   %conv = fptosi float %in to i64
   %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
@@ -17,11 +27,21 @@ define i64 @f32_i32(float %in) {
 }
 
 define i64 @f32_i31(float %in) {
-; CHECK-LABEL: @f32_i31(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64
-; CHECK-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 1073741823)
-; CHECK-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -1073741824)
-; CHECK-NEXT:    ret i64 [[MAX]]
+; CHECK-BASE-LABEL: @f32_i31(
+; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 1073741823)
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -1073741824)
+; CHECK-BASE-NEXT:    ret i64 [[MAX]]
+;
+; CHECK-MVEFP-LABEL: @f32_i31(
+; CHECK-MVEFP-NEXT:    [[TMP1:%.*]] = call i31 @llvm.fptosi.sat.i31.f32(float [[IN:%.*]])
+; CHECK-MVEFP-NEXT:    [[TMP2:%.*]] = sext i31 [[TMP1]] to i64
+; CHECK-MVEFP-NEXT:    ret i64 [[TMP2]]
+;
+; CHECK-FP64-LABEL: @f32_i31(
+; CHECK-FP64-NEXT:    [[TMP1:%.*]] = call i31 @llvm.fptosi.sat.i31.f32(float [[IN:%.*]])
+; CHECK-FP64-NEXT:    [[TMP2:%.*]] = sext i31 [[TMP1]] to i64
+; CHECK-FP64-NEXT:    ret i64 [[TMP2]]
 ;
   %conv = fptosi float %in to i64
   %min = call i64 @llvm.smin.i64(i64 %conv, i64 1073741823)
@@ -56,11 +76,22 @@ define i32 @f32_i8(float %in) {
 }
 
 define i64 @f64_i32(double %in) {
-; CHECK-LABEL: @f64_i32(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[IN:%.*]] to i64
-; CHECK-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647)
-; CHECK-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648)
-; CHECK-NEXT:    ret i64 [[MAX]]
+; CHECK-BASE-LABEL: @f64_i32(
+; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi double [[IN:%.*]] to i64
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647)
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648)
+; CHECK-BASE-NEXT:    ret i64 [[MAX]]
+;
+; CHECK-MVEFP-LABEL: @f64_i32(
+; CHECK-MVEFP-NEXT:    [[CONV:%.*]] = fptosi double [[IN:%.*]] to i64
+; CHECK-MVEFP-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647)
+; CHECK-MVEFP-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648)
+; CHECK-MVEFP-NEXT:    ret i64 [[MAX]]
+;
+; CHECK-FP64-LABEL: @f64_i32(
+; CHECK-FP64-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f64(double [[IN:%.*]])
+; CHECK-FP64-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-FP64-NEXT:    ret i64 [[TMP2]]
 ;
   %conv = fptosi double %in to i64
   %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
@@ -95,11 +126,21 @@ define i32 @f64_i16(double %in) {
 }
 
 define i64 @f16_i32(half %in) {
-; CHECK-LABEL: @f16_i32(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64
-; CHECK-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647)
-; CHECK-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648)
-; CHECK-NEXT:    ret i64 [[MAX]]
+; CHECK-BASE-LABEL: @f16_i32(
+; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647)
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648)
+; CHECK-BASE-NEXT:    ret i64 [[MAX]]
+;
+; CHECK-MVEFP-LABEL: @f16_i32(
+; CHECK-MVEFP-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]])
+; CHECK-MVEFP-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-MVEFP-NEXT:    ret i64 [[TMP2]]
+;
+; CHECK-FP64-LABEL: @f16_i32(
+; CHECK-FP64-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]])
+; CHECK-FP64-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-FP64-NEXT:    ret i64 [[TMP2]]
 ;
   %conv = fptosi half %in to i64
   %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
@@ -108,11 +149,21 @@ define i64 @f16_i32(half %in) {
 }
 
 define i64 @f16_i31(half %in) {
-; CHECK-LABEL: @f16_i31(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64
-; CHECK-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 1073741823)
-; CHECK-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -1073741824)
-; CHECK-NEXT:    ret i64 [[MAX]]
+; CHECK-BASE-LABEL: @f16_i31(
+; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 1073741823)
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -1073741824)
+; CHECK-BASE-NEXT:    ret i64 [[MAX]]
+;
+; CHECK-MVEFP-LABEL: @f16_i31(
+; CHECK-MVEFP-NEXT:    [[TMP1:%.*]] = call i31 @llvm.fptosi.sat.i31.f16(half [[IN:%.*]])
+; CHECK-MVEFP-NEXT:    [[TMP2:%.*]] = sext i31 [[TMP1]] to i64
+; CHECK-MVEFP-NEXT:    ret i64 [[TMP2]]
+;
+; CHECK-FP64-LABEL: @f16_i31(
+; CHECK-FP64-NEXT:    [[TMP1:%.*]] = call i31 @llvm.fptosi.sat.i31.f16(half [[IN:%.*]])
+; CHECK-FP64-NEXT:    [[TMP2:%.*]] = sext i31 [[TMP1]] to i64
+; CHECK-FP64-NEXT:    ret i64 [[TMP2]]
 ;
   %conv = fptosi half %in to i64
   %min = call i64 @llvm.smin.i64(i64 %conv, i64 1073741823)
@@ -147,11 +198,21 @@ define i32 @f16_i8(half %in) {
 }
 
 define <2 x i64> @v2f32_i32(<2 x float> %in) {
-; CHECK-LABEL: @v2f32_i32(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi <2 x float> [[IN:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> <i64 2147483647, i64 2147483647>)
-; CHECK-NEXT:    [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> <i64 -2147483648, i64 -2147483648>)
-; CHECK-NEXT:    ret <2 x i64> [[MAX]]
+; CHECK-BASE-LABEL: @v2f32_i32(
+; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi <2 x float> [[IN:%.*]] to <2 x i64>
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> <i64 2147483647, i64 2147483647>)
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> <i64 -2147483648, i64 -2147483648>)
+; CHECK-BASE-NEXT:    ret <2 x i64> [[MAX]]
+;
+; CHECK-MVEFP-LABEL: @v2f32_i32(
+; CHECK-MVEFP-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[IN:%.*]])
+; CHECK-MVEFP-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-MVEFP-NEXT:    ret <2 x i64> [[TMP2]]
+;
+; CHECK-FP64-LABEL: @v2f32_i32(
+; CHECK-FP64-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[IN:%.*]])
+; CHECK-FP64-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-FP64-NEXT:    ret <2 x i64> [[TMP2]]
 ;
   %conv = fptosi <2 x float> %in to <2 x i64>
   %min = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>)
@@ -160,11 +221,21 @@ define <2 x i64> @v2f32_i32(<2 x float> %in) {
 }
 
 define <4 x i64> @v4f32_i32(<4 x float> %in) {
-; CHECK-LABEL: @v4f32_i32(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i64>
-; CHECK-NEXT:    [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
-; CHECK-NEXT:    [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
-; CHECK-NEXT:    ret <4 x i64> [[MAX]]
+; CHECK-BASE-LABEL: @v4f32_i32(
+; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i64>
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+; CHECK-BASE-NEXT:    ret <4 x i64> [[MAX]]
+;
+; CHECK-MVEFP-LABEL: @v4f32_i32(
+; CHECK-MVEFP-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[IN:%.*]])
+; CHECK-MVEFP-NEXT:    [[TMP2:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64>
+; CHECK-MVEFP-NEXT:    ret <4 x i64> [[TMP2]]
+;
+; CHECK-FP64-LABEL: @v4f32_i32(
+; CHECK-FP64-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[IN:%.*]])
+; CHECK-FP64-NEXT:    [[TMP2:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64>
+; CHECK-FP64-NEXT:    ret <4 x i64> [[TMP2]]
 ;
   %conv = fptosi <4 x float> %in to <4 x i64>
   %min = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -173,11 +244,21 @@ define <4 x i64> @v4f32_i32(<4 x float> %in) {
 }
 
 define <8 x i64> @v8f32_i32(<8 x float> %in) {
-; CHECK-LABEL: @v8f32_i32(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi <8 x float> [[IN:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
-; CHECK-NEXT:    [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
-; CHECK-NEXT:    ret <8 x i64> [[MAX]]
+; CHECK-BASE-LABEL: @v8f32_i32(
+; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi <8 x float> [[IN:%.*]] to <8 x i64>
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+; CHECK-BASE-NEXT:    ret <8 x i64> [[MAX]]
+;
+; CHECK-MVEFP-LABEL: @v8f32_i32(
+; CHECK-MVEFP-NEXT:    [[TMP1:%.*]] = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> [[IN:%.*]])
+; CHECK-MVEFP-NEXT:    [[TMP2:%.*]] = sext <8 x i32> [[TMP1]] to <8 x i64>
+; CHECK-MVEFP-NEXT:    ret <8 x i64> [[TMP2]]
+;
+; CHECK-FP64-LABEL: @v8f32_i32(
+; CHECK-FP64-NEXT:    [[TMP1:%.*]] = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> [[IN:%.*]])
+; CHECK-FP64-NEXT:    [[TMP2:%.*]] = sext <8 x i32> [[TMP1]] to <8 x i64>
+; CHECK-FP64-NEXT:    ret <8 x i64> [[TMP2]]
 ;
   %conv = fptosi <8 x float> %in to <8 x i64>
   %min = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %conv, <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -186,11 +267,21 @@ define <8 x i64> @v8f32_i32(<8 x float> %in) {
 }
 
 define <4 x i32> @v4f16_i16(<4 x half> %in) {
-; CHECK-LABEL: @v4f16_i16(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
-; CHECK-NEXT:    [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
-; CHECK-NEXT:    ret <4 x i32> [[MAX]]
+; CHECK-BASE-LABEL: @v4f16_i16(
+; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32>
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+; CHECK-BASE-NEXT:    ret <4 x i32> [[MAX]]
+;
+; CHECK-MVEFP-LABEL: @v4f16_i16(
+; CHECK-MVEFP-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]])
+; CHECK-MVEFP-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-MVEFP-NEXT:    ret <4 x i32> [[TMP2]]
+;
+; CHECK-FP64-LABEL: @v4f16_i16(
+; CHECK-FP64-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]])
+; CHECK-FP64-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-FP64-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %conv = fptosi <4 x half> %in to <4 x i32>
   %min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
@@ -199,11 +290,21 @@ define <4 x i32> @v4f16_i16(<4 x half> %in) {
 }
 
 define <8 x i32> @v8f16_i16(<8 x half> %in) {
-; CHECK-LABEL: @v8f16_i16(
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32>
-; CHECK-NEXT:    [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
-; CHECK-NEXT:    [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
-; CHECK-NEXT:    ret <8 x i32> [[MAX]]
+; CHECK-BASE-LABEL: @v8f16_i16(
+; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32>
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+; CHECK-BASE-NEXT:    ret <8 x i32> [[MAX]]
+;
+; CHECK-MVEFP-LABEL: @v8f16_i16(
+; CHECK-MVEFP-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]])
+; CHECK-MVEFP-NEXT:    [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32>
+; CHECK-MVEFP-NEXT:    ret <8 x i32> [[TMP2]]
+;
+; CHECK-FP64-LABEL: @v8f16_i16(
+; CHECK-FP64-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]])
+; CHECK-FP64-NEXT:    [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32>
+; CHECK-FP64-NEXT:    ret <8 x i32> [[TMP2]]
 ;
   %conv = fptosi <8 x half> %in to <8 x i32>
   %min = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
-- 
2.7.4