From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Wed, 18 Jul 2018 10:55:17 +0000 (+0000)
Subject: [InstCombine] Re-commit: Fold 'check for [no] signed truncation' pattern
X-Git-Tag: llvmorg-7.0.0-rc1~1154
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3cb87e905c033d3bc02599b3ed1728007abbe28a;p=platform%2Fupstream%2Fllvm.git

[InstCombine] Re-commit: Fold 'check for [no] signed truncation' pattern

Summary:
[[ https://bugs.llvm.org/show_bug.cgi?id=38149 | PR38149 ]]

As discussed in https://reviews.llvm.org/D49179#1158957 and later,
the IR for 'check for [no] signed truncation' pattern can be improved:
https://rise4fun.com/Alive/gBf
^ that pattern will be produced by Implicit Integer Truncation sanitizer,
https://reviews.llvm.org/D48958 https://bugs.llvm.org/show_bug.cgi?id=21530
in signed case, therefore it is probably a good idea to improve it.

The DAGCombine will reverse this transform, see
https://reviews.llvm.org/D49266

This transform is surprisingly frustrating.
This does not deal with non-splat shift amounts, or with undef shift amounts.
I've outlined what i think the solution should be:
```
  // Potential handling of non-splats: for each element:
  //  * if both are undef, replace with constant 0.
  //    Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0.
  //  * if both are not undef, and are different, bailout.
  //  * else, only one is undef, then pick the non-undef one.
```

This is a re-commit, as the original patch, committed in rL337190
was reverted in rL337344 as it broke chromium build:
https://bugs.llvm.org/show_bug.cgi?id=38204 and
https://crbug.com/864832
Proofs that the fixed folds are ok: https://rise4fun.com/Alive/VYM

Differential Revision: https://reviews.llvm.org/D49320

llvm-svn: 337376
---

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index f91eb9d..6de92a4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2945,6 +2945,81 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
   return Builder.CreateICmp(DstPred, X, M);
 }
 
+/// Some comparisons can be simplified.
+/// In this case, we are looking for comparisons that look like
+/// a check for a lossy signed truncation.
+/// Folds:   (MaskedBits is a constant.)
+///   ((%x << MaskedBits) a>> MaskedBits) SrcPred %x
+/// Into:
+///   (add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits)
+/// Where  KeptBits = bitwidth(%x) - MaskedBits
+static Value *
+foldICmpWithTruncSignExtendedVal(ICmpInst &I,
+                                 InstCombiner::BuilderTy &Builder) {
+  ICmpInst::Predicate SrcPred;
+  Value *X;
+  const APInt *C0, *C1; // FIXME: non-splats, potentially with undef.
+  // We are ok with 'shl' having multiple uses, but 'ashr' must be one-use.
+  if (!match(&I, m_c_ICmp(SrcPred,
+                          m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C0)),
+                                          m_APInt(C1))),
+                          m_Deferred(X))))
+    return nullptr;
+
+  // Potential handling of non-splats: for each element:
+  //  * if both are undef, replace with constant 0.
+  //    Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0.
+  //  * if both are not undef, and are different, bailout.
+  //  * else, only one is undef, then pick the non-undef one.
+
+  // The shift amount must be equal.
+  if (*C0 != *C1)
+    return nullptr;
+  const APInt &MaskedBits = *C0;
+  assert(MaskedBits != 0 && "shift by zero should be folded away already.");
+
+  ICmpInst::Predicate DstPred;
+  switch (SrcPred) {
+  case ICmpInst::Predicate::ICMP_EQ:
+    // ((%x << MaskedBits) a>> MaskedBits) == %x
+    //   =>
+    // (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits)
+    DstPred = ICmpInst::Predicate::ICMP_ULT;
+    break;
+  case ICmpInst::Predicate::ICMP_NE:
+    // ((%x << MaskedBits) a>> MaskedBits) != %x
+    //   =>
+    // (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits)
+    DstPred = ICmpInst::Predicate::ICMP_UGE;
+    break;
+  // FIXME: are more folds possible?
+  default:
+    return nullptr;
+  }
+
+  auto *XType = X->getType();
+  const unsigned XBitWidth = XType->getScalarSizeInBits();
+  const APInt BitWidth = APInt(XBitWidth, XBitWidth);
+  assert(BitWidth.ugt(MaskedBits) && "shifts should leave some bits untouched");
+
+  // KeptBits = bitwidth(%x) - MaskedBits
+  const APInt KeptBits = BitWidth - MaskedBits;
+  assert(KeptBits.ugt(0) && KeptBits.ult(BitWidth) && "unreachable");
+  // ICmpCst = (1 << KeptBits)
+  const APInt ICmpCst = APInt(XBitWidth, 1).shl(KeptBits);
+  assert(ICmpCst.isPowerOf2());
+  // AddCst = (1 << (KeptBits-1))
+  const APInt AddCst = ICmpCst.lshr(1);
+  assert(AddCst.ult(ICmpCst) && AddCst.isPowerOf2());
+
+  // T0 = add %x, AddCst
+  Value *T0 = Builder.CreateAdd(X, ConstantInt::get(XType, AddCst));
+  // T1 = T0 DstPred ICmpCst
+  Value *T1 = Builder.CreateICmp(DstPred, T0, ConstantInt::get(XType, ICmpCst));
+
+  return T1;
+}
+
 /// Try to fold icmp (binop), X or icmp X, (binop).
 /// TODO: A large part of this logic is duplicated in InstSimplify's
 /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
@@ -3285,6 +3360,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
   if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
     return replaceInstUsesWith(I, V);
 
+  if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
+    return replaceInstUsesWith(I, V);
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll
index 2e75ae6..60aa4d4 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll
@@ -15,9 +15,8 @@
 
 define i1 @p0(i8 %x) {
 ; CHECK-LABEL: @p0(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %tmp0 = shl i8 %x, 5
@@ -29,9 +28,8 @@ define i1 @p0(i8 %x) {
 ; Big unusual bit width, https://bugs.llvm.org/show_bug.cgi?id=38204
 define i1 @pb(i65 %x) {
 ; CHECK-LABEL: @pb(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i65 [[X:%.*]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i65 [[TMP0]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i65 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i65 [[X:%.*]], 9223372036854775808
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i65 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %tmp0 = shl i65 %x, 1
@@ -46,9 +44,8 @@ define i1 @pb(i65 %x) {
 
 define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @p1_vec_splat(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 5, i8 5>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], <i8 8, i8 8>
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
   %tmp0 = shl <2 x i8> %x, <i8 5, i8 5>
@@ -118,9 +115,8 @@ declare i8 @gen8()
 define i1 @c0() {
 ; CHECK-LABEL: @c0(
 ; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X]], 5
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[X]], [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %x = call i8 @gen8()
@@ -140,8 +136,8 @@ define i1 @n_oneuse0(i8 %x) {
 ; CHECK-LABEL: @n_oneuse0(
 ; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
 ; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %tmp0 = shl i8 %x, 5
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll
index 6cd32db..90d19be 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll
@@ -15,9 +15,8 @@
 
 define i1 @p0(i8 %x) {
 ; CHECK-LABEL: @p0(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %tmp0 = shl i8 %x, 5
@@ -29,9 +28,8 @@ define i1 @p0(i8 %x) {
 ; Big unusual bit width, https://bugs.llvm.org/show_bug.cgi?id=38204
 define i1 @pb(i65 %x) {
 ; CHECK-LABEL: @pb(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i65 [[X:%.*]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i65 [[TMP0]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i65 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i65 [[X:%.*]], 9223372036854775808
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i65 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %tmp0 = shl i65 %x, 1
@@ -46,9 +44,8 @@ define i1 @pb(i65 %x) {
 
 define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @p1_vec_splat(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 5, i8 5>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i8> [[TMP1]], <i8 7, i8 7>
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
   %tmp0 = shl <2 x i8> %x, <i8 5, i8 5>
@@ -118,9 +115,8 @@ declare i8 @gen8()
 define i1 @c0() {
 ; CHECK-LABEL: @c0(
 ; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X]], 5
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[X]], [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %x = call i8 @gen8()
@@ -140,8 +136,8 @@ define i1 @n_oneuse0(i8 %x) {
 ; CHECK-LABEL: @n_oneuse0(
 ; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
 ; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %tmp0 = shl i8 %x, 5