From c048a02b5b26c21a2a891131e3d52c7ddbc3cf62 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 24 May 2020 09:30:19 -0400
Subject: [PATCH] [InstCombine] fold FP trunc into exact itofp

Similar to D79116 and rGbfd512160fe0 - if the 1st cast
is exact, then we can go directly to the destination
type because there is no double-rounding.
---
 .../Transforms/InstCombine/InstCombineCasts.cpp    | 88 ++++++++++++----------
 llvm/test/Transforms/InstCombine/fptrunc.ll        |  5 +-
 2 files changed, 50 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 44ca361..c68f9e8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1558,6 +1558,48 @@ static Type *getMinimumFPType(Value *V) {
   return V->getType();
 }
 
+/// Return true if the cast from integer to FP can be proven to be exact for all
+/// possible inputs (the conversion does not lose any precision).
+static bool isKnownExactCastIntToFP(CastInst &I) {
+  CastInst::CastOps Opcode = I.getOpcode();
+  assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
+         "Unexpected cast");
+  Value *Src = I.getOperand(0);
+  Type *SrcTy = Src->getType();
+  Type *FPTy = I.getType();
+  bool IsSigned = Opcode == Instruction::SIToFP;
+  int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
+
+  // Easy case - if the source integer type has less bits than the FP mantissa,
+  // then the cast must be exact.
+  int DestNumSigBits = FPTy->getFPMantissaWidth();
+  if (SrcSize <= DestNumSigBits)
+    return true;
+
+  // Cast from FP to integer and back to FP is independent of the intermediate
+  // integer width because of poison on overflow.
+  Value *F;
+  if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
+    // If this is uitofp (fptosi F), the source needs an extra bit to avoid
+    // potential rounding of negative FP input values.
+    int SrcNumSigBits = F->getType()->getFPMantissaWidth();
+    if (!IsSigned && match(Src, m_FPToSI(m_Value())))
+      SrcNumSigBits++;
+
+    // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
+    // significant bits than the destination (and make sure neither type is
+    // weird -- ppc_fp128).
+    if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
+        SrcNumSigBits <= DestNumSigBits)
+      return true;
+  }
+
+  // TODO:
+  // Try harder to find if the source integer type has less significant bits.
+  // For example, compute number of sign bits or compute low bit mask.
+  return false;
+}
+
 Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
   if (Instruction *I = commonCastTransforms(FPT))
     return I;
@@ -1731,48 +1773,14 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
   if (Instruction *I = shrinkInsertElt(FPT, Builder))
     return I;
 
-  return nullptr;
-}
-
-/// Return true if the cast from integer to FP can be proven to be exact for all
-/// possible inputs (the conversion does not lose any precision).
-static bool isKnownExactCastIntToFP(CastInst &I) {
-  CastInst::CastOps Opcode = I.getOpcode();
-  assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
-         "Unexpected cast");
-  Value *Src = I.getOperand(0);
-  Type *SrcTy = Src->getType();
-  Type *FPTy = I.getType();
-  bool IsSigned = Opcode == Instruction::SIToFP;
-  int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
-
-  // Easy case - if the source integer type has less bits than the FP mantissa,
-  // then the cast must be exact.
-  int DestNumSigBits = FPTy->getFPMantissaWidth();
-  if (SrcSize <= DestNumSigBits)
-    return true;
-
-  // Cast from FP to integer and back to FP is independent of the intermediate
-  // integer width because of poison on overflow.
-  Value *F;
-  if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
-    // If this is uitofp (fptosi F), the source needs an extra bit to avoid
-    // potential rounding of negative FP input values.
-    int SrcNumSigBits = F->getType()->getFPMantissaWidth();
-    if (!IsSigned && match(Src, m_FPToSI(m_Value())))
-      SrcNumSigBits++;
-
-    // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
-    // significant bits than the destination (and make sure neither type is
-    // weird -- ppc_fp128).
-    if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
-        SrcNumSigBits <= DestNumSigBits)
-      return true;
+  Value *Src = FPT.getOperand(0);
+  if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
+    auto *FPCast = cast<CastInst>(Src);
+    if (isKnownExactCastIntToFP(*FPCast))
+      return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
   }
 
-  // TODO:
-  // Try harder to find if the source integer type has less significant bits.
-  return false;
+  return nullptr;
 }
 
 Instruction *InstCombiner::visitFPExt(CastInst &FPExt) {
diff --git a/llvm/test/Transforms/InstCombine/fptrunc.ll b/llvm/test/Transforms/InstCombine/fptrunc.ll
index 9c9ee11..c04c4b5 100644
--- a/llvm/test/Transforms/InstCombine/fptrunc.ll
+++ b/llvm/test/Transforms/InstCombine/fptrunc.ll
@@ -141,8 +141,7 @@ define float @fptrunc_select_true_val_type_mismatch_fast(half %x, double %y, i1
 
 define <2 x float> @ItoFtoF_s54_f64_f32(<2 x i54> %i) {
 ; CHECK-LABEL: @ItoFtoF_s54_f64_f32(
-; CHECK-NEXT:    [[X:%.*]] = sitofp <2 x i54> [[I:%.*]] to <2 x double>
-; CHECK-NEXT:    [[R:%.*]] = fptrunc <2 x double> [[X]] to <2 x float>
+; CHECK-NEXT:    [[R:%.*]] = sitofp <2 x i54> [[I:%.*]] to <2 x float>
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
   %x = sitofp <2 x i54> %i to <2 x double>
@@ -157,7 +156,7 @@ define half @ItoFtoF_u24_f32_f16(i24 %i) {
 ; CHECK-LABEL: @ItoFtoF_u24_f32_f16(
 ; CHECK-NEXT:    [[X:%.*]] = uitofp i24 [[I:%.*]] to float
 ; CHECK-NEXT:    call void @use(float [[X]])
-; CHECK-NEXT:    [[R:%.*]] = fptrunc float [[X]] to half
+; CHECK-NEXT:    [[R:%.*]] = uitofp i24 [[I]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %x = uitofp i24 %i to float
-- 
2.7.4