From 20da7e467aff1949397a8d3ff56eb5df8424e0c8 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <max.kazantsev@azul.com>
Date: Fri, 6 Jul 2018 04:04:13 +0000
Subject: [PATCH] Revert "[InstCombine] Delay foldICmpUsingKnownBits until
 simple transforms are done"

llvm-svn: 336410
---
 .../Transforms/InstCombine/InstCombineCompares.cpp | 10 +++------
 .../ValueTracking/non-negative-phi-bits.ll         |  2 +-
 llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll   | 24 ++++++++++++++++------
 llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll | 21 +++++++++----------
 .../icmp_sdiv_with_and_without_range.ll            |  6 +++---
 llvm/test/Transforms/InstCombine/max_known_bits.ll |  4 +---
 .../LoopVectorize/X86/masked_load_store.ll         |  3 ++-
 7 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 41e4124..d52ea2c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4485,6 +4485,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   if (Instruction *Res = foldICmpWithConstant(I))
     return Res;
 
+  if (Instruction *Res = foldICmpUsingKnownBits(I))
+    return Res;
+
   // Test if the ICmpInst instruction is used exclusively by a select as
   // part of a minimum or maximum operation. If so, refrain from doing
   // any other folding. This helps out other analyses which understand
@@ -4703,13 +4706,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
       return foldICmpAddOpConst(X, Cst, I.getSwappedPredicate());
   }
-
-  // This may be expensive in compile-time, and transforms based on known bits
-  // can make further analysis more difficult, so we use it as the last resort
-  // if we cannot do anything better.
-  if (Instruction *Res = foldICmpUsingKnownBits(I))
-    return Res;
-
   return Changed ? &I : nullptr;
 }
 
diff --git a/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll b/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll
index 3b1c43d..059bbaa 100755
--- a/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll
+++ b/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll
@@ -8,7 +8,7 @@ define void @test() #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 39
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 40
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll
index 942790c..ba05302 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll
@@ -69,9 +69,11 @@ define <2 x i1> @icmp_shl_nsw_eq_vec(<2 x i32> %x) {
 ; icmp sgt with shl nsw with a constant compare operand and constant
 ; shift amount can always be reduced to icmp sgt alone.
 
+; Known bits analysis turns this into an equality predicate.
+
 define i1 @icmp_sgt1(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt1(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %x, -64
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -142,10 +144,11 @@ define i1 @icmp_sgt7(i8 %x) {
   ret i1 %cmp
 }
 
+; Known bits analysis turns this into an equality predicate.
 
 define i1 @icmp_sgt8(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt8(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, 62
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %x, 63
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -155,9 +158,11 @@ define i1 @icmp_sgt8(i8 %x) {
 
 ; Compares with 126 and 127 are recognized as always false.
 
+; Known bits analysis turns this into an equality predicate.
+
 define i1 @icmp_sgt9(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt9(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %x, -1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 7
@@ -205,9 +210,11 @@ define <2 x i1> @icmp_sgt11_vec(<2 x i8> %x) {
 ;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
+; Known bits analysis turns this into an equality predicate.
+
 define i1 @icmp_sle1(i8 %x) {
 ; CHECK-LABEL: @icmp_sle1(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, -63
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %x, -64
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -278,9 +285,11 @@ define i1 @icmp_sle7(i8 %x) {
   ret i1 %cmp
 }
 
+; Known bits analysis turns this into an equality predicate.
+
 define i1 @icmp_sle8(i8 %x) {
 ; CHECK-LABEL: @icmp_sle8(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 63
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %x, 63
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -290,9 +299,11 @@ define i1 @icmp_sle8(i8 %x) {
 
 ; Compares with 126 and 127 are recognized as always true.
 
+; Known bits analysis turns this into an equality predicate.
+
 define i1 @icmp_sle9(i8 %x) {
 ; CHECK-LABEL: @icmp_sle9(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %x, -1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 7
@@ -342,3 +353,4 @@ define i1 @icmp_ne1(i8 %x) {
   %cmp = icmp ne i8 %shl, -128
   ret i1 %cmp
 }
+
diff --git a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
index a43edfa..bf1a031 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i1 @lshrugt_01_00(i4 %x) {
@@ -1835,7 +1834,7 @@ define i1 @lshrugt_01_05_exact(i4 %x) {
 
 define i1 @lshrugt_01_06_exact(i4 %x) {
 ; CHECK-LABEL: @lshrugt_01_06_exact(
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, -4
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i4 %x, -2
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = lshr exact i4 %x, 1
@@ -1946,7 +1945,7 @@ define i1 @lshrugt_02_01_exact(i4 %x) {
 
 define i1 @lshrugt_02_02_exact(i4 %x) {
 ; CHECK-LABEL: @lshrugt_02_02_exact(
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, -8
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i4 %x, -4
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = lshr exact i4 %x, 2
@@ -2227,7 +2226,7 @@ define i1 @lshrult_01_00_exact(i4 %x) {
 
 define i1 @lshrult_01_01_exact(i4 %x) {
 ; CHECK-LABEL: @lshrult_01_01_exact(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, 2
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i4 %x, 0
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = lshr exact i4 %x, 1
@@ -2287,7 +2286,7 @@ define i1 @lshrult_01_06_exact(i4 %x) {
 
 define i1 @lshrult_01_07_exact(i4 %x) {
 ; CHECK-LABEL: @lshrult_01_07_exact(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, -2
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i4 %x, -2
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = lshr exact i4 %x, 1
@@ -2378,7 +2377,7 @@ define i1 @lshrult_02_00_exact(i4 %x) {
 
 define i1 @lshrult_02_01_exact(i4 %x) {
 ; CHECK-LABEL: @lshrult_02_01_exact(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, 4
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i4 %x, 0
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = lshr exact i4 %x, 2
@@ -2398,7 +2397,7 @@ define i1 @lshrult_02_02_exact(i4 %x) {
 
 define i1 @lshrult_02_03_exact(i4 %x) {
 ; CHECK-LABEL: @lshrult_02_03_exact(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, -4
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i4 %x, -4
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = lshr exact i4 %x, 2
@@ -2525,7 +2524,7 @@ define i1 @lshrult_03_00_exact(i4 %x) {
 
 define i1 @lshrult_03_01_exact(i4 %x) {
 ; CHECK-LABEL: @lshrult_03_01_exact(
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i4 %x, -8
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = lshr exact i4 %x, 3
@@ -2802,7 +2801,7 @@ define i1 @ashrsgt_01_14_exact(i4 %x) {
 
 define i1 @ashrsgt_01_15_exact(i4 %x) {
 ; CHECK-LABEL: @ashrsgt_01_15_exact(
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -2
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr exact i4 %x, 1
@@ -2949,7 +2948,7 @@ define i1 @ashrsgt_02_14_exact(i4 %x) {
 
 define i1 @ashrsgt_02_15_exact(i4 %x) {
 ; CHECK-LABEL: @ashrsgt_02_15_exact(
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -4
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr exact i4 %x, 2
@@ -3094,7 +3093,7 @@ define i1 @ashrsgt_03_14_exact(i4 %x) {
 
 define i1 @ashrsgt_03_15_exact(i4 %x) {
 ; CHECK-LABEL: @ashrsgt_03_15_exact(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i4 %x, -8
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr exact i4 %x, 3
diff --git a/llvm/test/Transforms/InstCombine/icmp_sdiv_with_and_without_range.ll b/llvm/test/Transforms/InstCombine/icmp_sdiv_with_and_without_range.ll
index 5b9b0b4..174c4b9 100644
--- a/llvm/test/Transforms/InstCombine/icmp_sdiv_with_and_without_range.ll
+++ b/llvm/test/Transforms/InstCombine/icmp_sdiv_with_and_without_range.ll
@@ -2,8 +2,7 @@
 ; RUN: opt -instcombine -S < %s | FileCheck %s
 
 ; Test that presence of range does not cause unprofitable transforms with bit
-; arithmetics. InstCombine needs to be smart about dealing with range-annotated
-; values.
+; arithmetics, and instcombine behaves exactly the same as without the range.
 
 define i1 @without_range(i32* %A) {
 ; CHECK-LABEL: @without_range(
@@ -20,7 +19,8 @@ define i1 @without_range(i32* %A) {
 define i1 @with_range(i32* %A) {
 ; CHECK-LABEL: @with_range(
 ; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A:%.*]], align 8, !range !0
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[A_VAL]], 2
+; CHECK-NEXT:    [[B_MASK:%.*]] = and i32 [[A_VAL]], 2147483646
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[B_MASK]], 0
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %A.val = load i32, i32* %A, align 8, !range !0
diff --git a/llvm/test/Transforms/InstCombine/max_known_bits.ll b/llvm/test/Transforms/InstCombine/max_known_bits.ll
index 9dca481..8733239 100644
--- a/llvm/test/Transforms/InstCombine/max_known_bits.ll
+++ b/llvm/test/Transforms/InstCombine/max_known_bits.ll
@@ -6,9 +6,7 @@
 define i16 @foo(i16 %x)  {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:    [[T1:%.*]] = and i16 [[X:%.*]], 255
-; CHECK-NEXT:    [[T3:%.*]] = icmp ult i16 [[T1]], 255
-; CHECK-NEXT:    [[T4:%.*]] = select i1 [[T3]], i16 [[T1]], i16 255
-; CHECK-NEXT:    ret i16 [[T4]]
+; CHECK-NEXT:    ret i16 [[T1]]
 ;
   %t1 = and i16 %x, 255
   %t2 = zext i16 %t1 to i32
diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index b4bd5c5..8e94863 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -2052,7 +2052,8 @@ define void @foo4(double* %A, double* %B, i32* %trigger)  {
 ; AVX512-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp eq i64 [[PROL_ITER_SUB]], 0
 ; AVX512-NEXT:    br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL]], !llvm.loop !50
 ; AVX512:       for.body.prol.loopexit:
-; AVX512-NEXT:    [[TMP28:%.*]] = icmp ult i64 [[TMP24]], 48
+; AVX512-NEXT:    [[DOTMASK:%.*]] = and i64 [[TMP24]], 9984
+; AVX512-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[DOTMASK]], 0
 ; AVX512-NEXT:    br i1 [[TMP28]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; AVX512:       for.body:
 ; AVX512-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_INC_3:%.*]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL_LOOPEXIT]] ]
-- 
2.7.4