From bec15b6516466a1f3eeb2a52e5584b5d4f5b713e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 22 Aug 2018 23:27:50 +0000
Subject: [PATCH] [ValueTracking] Teach computeNumSignBits to understand
 min/max clamp patterns with constant/splat values

If we have a min/max pair we can do a better job of counting sign bits if we look at them together. This is similar to what is done in the SelectionDAG version of computeNumSignBits for ISD::SMAX/SMIN.

Differential Revision: https://reviews.llvm.org/D51112

llvm-svn: 340480
---
 llvm/lib/Analysis/ValueTracking.cpp                | 38 ++++++++++-
 llvm/test/Transforms/InstCombine/max_known_bits.ll | 78 ++++++++++++++++++++++
 2 files changed, 115 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 48d48f2..322d5c2 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -2209,6 +2209,34 @@ bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
   return Mask.isSubsetOf(Known.Zero);
 }
 
+// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
+// Returns the input and lower/upper bounds.
+static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
+                                const APInt *&CLow, const APInt *&CHigh) {
+  assert(isa<SelectInst>(Select) && "Input should be a SelectInst!");
+
+  const Value *LHS, *RHS, *LHS2, *RHS2;
+  SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
+  if (SPF != SPF_SMAX && SPF != SPF_SMIN)
+    return false;
+
+  if (!match(RHS, m_APInt(CLow)))
+    return false;
+
+  SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
+  if (getInverseMinMaxFlavor(SPF) != SPF2)
+    return false;
+
+  if (!match(RHS2, m_APInt(CHigh)))
+    return false;
+
+  if (SPF == SPF_SMIN)
+    std::swap(CLow, CHigh);
+
+  In = LHS2;
+  return CLow->sle(*CHigh);
+}
+
 /// For vector constants, loop over the elements and find the constant with the
 /// minimum number of sign bits. Return 0 if the value is not a vector constant
 /// or if any element was not analyzed; otherwise, return the count for the
@@ -2370,11 +2398,19 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
     }
     break;
 
-  case Instruction::Select:
+  case Instruction::Select: {
+    // If we have a clamp pattern, we know that the number of sign bits will be
+    // the minimum of the clamp min/max range.
+    const Value *X;
+    const APInt *CLow, *CHigh;
+    if (isSignedMinMaxClamp(U, X, CLow, CHigh))
+      return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
+
     Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
     if (Tmp == 1) break;
     Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
     return std::min(Tmp, Tmp2);
+  }
 
   case Instruction::Add:
     // Add can have at most one carry bit.  Thus we know that the output
diff --git a/llvm/test/Transforms/InstCombine/max_known_bits.ll b/llvm/test/Transforms/InstCombine/max_known_bits.ll
index 8733239..2f7ee6b 100644
--- a/llvm/test/Transforms/InstCombine/max_known_bits.ll
+++ b/llvm/test/Transforms/InstCombine/max_known_bits.ll
@@ -17,3 +17,81 @@ define i16 @foo(i16 %x)  {
   ret i16 %t6
 }
 
+; This contains a min/max pair to clamp a value to 12 bits.
+; By analyzing the clamp pattern, we can tell the add doesn't have signed overflow.
+define i16 @min_max_clamp(i16 %x) {
+; CHECK-LABEL: @min_max_clamp(
+; CHECK-NEXT:    [[A:%.*]] = icmp sgt i16 [[X:%.*]], -2048
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[A]], i16 [[X]], i16 -2048
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i16 [[B]], 2047
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i16 [[B]], i16 2047
+; CHECK-NEXT:    [[E:%.*]] = add nsw i16 [[D]], 1
+; CHECK-NEXT:    ret i16 [[E]]
+;
+  %a = icmp sgt i16 %x, -2048
+  %b = select i1 %a, i16 %x, i16 -2048
+  %c = icmp slt i16 %b, 2047
+  %d = select i1 %c, i16 %b, i16 2047
+  %e = add i16 %d, 1
+  ret i16 %e
+}
+
+; Same as above with min/max reversed.
+define i16 @min_max_clamp_2(i16 %x) {
+; CHECK-LABEL: @min_max_clamp_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp slt i16 [[X:%.*]], 2047
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[A]], i16 [[X]], i16 2047
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i16 [[B]], -2048
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i16 [[B]], i16 -2048
+; CHECK-NEXT:    [[E:%.*]] = add nsw i16 [[D]], 1
+; CHECK-NEXT:    ret i16 [[E]]
+;
+  %a = icmp slt i16 %x, 2047
+  %b = select i1 %a, i16 %x, i16 2047
+  %c = icmp sgt i16 %b, -2048
+  %d = select i1 %c, i16 %b, i16 -2048
+  %e = add i16 %d, 1
+  ret i16 %e
+}
+
+; This contains a min/max pair to clamp a value to 12 bits.
+; By analyzing the clamp pattern, we can tell that the second add doesn't
+; overflow the original type and can be moved before the extend.
+define i32 @min_max_clamp_3(i16 %x) {
+; CHECK-LABEL: @min_max_clamp_3(
+; CHECK-NEXT:    [[A:%.*]] = icmp sgt i16 [[X:%.*]], -2048
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[A]], i16 [[X]], i16 -2048
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i16 [[B]], 2047
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i16 [[B]], i16 2047
+; CHECK-NEXT:    [[G:%.*]] = sext i16 [[D]] to i32
+; CHECK-NEXT:    ret i32 [[G]]
+;
+  %a = icmp sgt i16 %x, -2048
+  %b = select i1 %a, i16 %x, i16 -2048
+  %c = icmp slt i16 %b, 2047
+  %d = select i1 %c, i16 %b, i16 2047
+  %e = add i16 %d, 1
+  %f = sext i16 %e to i32
+  %g = add i32 %f, -1
+  ret i32 %g
+}
+
+; Same as above with min/max order reversed
+define i32 @min_max_clamp_4(i16 %x) {
+; CHECK-LABEL: @min_max_clamp_4(
+; CHECK-NEXT:    [[A:%.*]] = icmp slt i16 [[X:%.*]], 2047
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[A]], i16 [[X]], i16 2047
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i16 [[B]], -2048
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i16 [[B]], i16 -2048
+; CHECK-NEXT:    [[G:%.*]] = sext i16 [[D]] to i32
+; CHECK-NEXT:    ret i32 [[G]]
+;
+  %a = icmp slt i16 %x, 2047
+  %b = select i1 %a, i16 %x, i16 2047
+  %c = icmp sgt i16 %b, -2048
+  %d = select i1 %c, i16 %b, i16 -2048
+  %e = add i16 %d, 1
+  %f = sext i16 %e to i32
+  %g = add i32 %f, -1
+  ret i32 %g
+}
-- 
2.7.4