From 5c5311f4e50828ae1e2ad597c5d715c2dedd836a Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 28 Aug 2016 18:18:00 +0000
Subject: [PATCH] [InstCombine] use m_APInt to allow icmp (and X, Y), C folds
 for splat constant vectors

llvm-svn: 279937
---
 .../Transforms/InstCombine/InstCombineCompares.cpp | 66 +++++++++++-----------
 llvm/test/Transforms/InstCombine/and-compare.ll    | 10 ++--
 llvm/test/Transforms/InstCombine/apint-shift.ll    |  4 +-
 llvm/test/Transforms/InstCombine/icmp.ll           |  8 +--
 llvm/test/Transforms/InstCombine/shift.ll          |  4 +-
 5 files changed, 42 insertions(+), 50 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b0802a3..81dcbb7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1610,48 +1610,50 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
   if (Instruction *I = foldICmpAndConstConst(Cmp, And, C))
     return I;
 
-  // FIXME: This check restricts all folds under here to scalar types.
-  ConstantInt *RHS = dyn_cast<ConstantInt>(Cmp.getOperand(1));
-  if (!RHS)
-    return nullptr;
-
-  // Try to optimize things like "A[i]&42 == 0" to index computations.
-  if (LoadInst *LI = dyn_cast<LoadInst>(And->getOperand(0))) {
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
-      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+  // TODO: These all require that Y is constant too, so refactor with the above.
+
+  // Try to optimize things like "A[i] & 42 == 0" to index computations.
+  Value *X = And->getOperand(0);
+  Value *Y = And->getOperand(1);
+  if (auto *LI = dyn_cast<LoadInst>(X))
+    if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+      if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
         if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
-            !LI->isVolatile() && isa<ConstantInt>(And->getOperand(1))) {
-          ConstantInt *C = cast<ConstantInt>(And->getOperand(1));
-          if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C))
+            !LI->isVolatile() && isa<ConstantInt>(Y)) {
+          ConstantInt *C2 = cast<ConstantInt>(Y);
+          if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C2))
             return Res;
         }
-  }
+
+  if (!Cmp.isEquality())
+    return nullptr;
 
   // X & -C == -C -> X >  u ~C
   // X & -C != -C -> X <= u ~C
   //   iff C is a power of 2
-  if (Cmp.isEquality() && RHS == And->getOperand(1) && (-(*C)).isPowerOf2())
-    return new ICmpInst(Cmp.getPredicate() == ICmpInst::ICMP_EQ
-                            ? ICmpInst::ICMP_UGT
-                            : ICmpInst::ICMP_ULE,
-                        And->getOperand(0), SubOne(RHS));
+  if (Cmp.getOperand(1) == Y && (-(*C)).isPowerOf2()) {
+    auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT
+                                                          : CmpInst::ICMP_ULE;
+    return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
+  }
 
-  // (icmp eq (and %A, C), 0) -> (icmp sgt (trunc %A), -1)
-  //   iff C is a power of 2
-  if (Cmp.isEquality() && And->hasOneUse() && match(RHS, m_Zero())) {
-    if (auto *CI = dyn_cast<ConstantInt>(And->getOperand(1))) {
-      const APInt &AI = CI->getValue();
-      int32_t ExactLogBase2 = AI.exactLogBase2();
-      if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
-        Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
-        Value *Trunc = Builder->CreateTrunc(And->getOperand(0), NTy);
-        return new ICmpInst(Cmp.getPredicate() == ICmpInst::ICMP_EQ
-                                ? ICmpInst::ICMP_SGE
-                                : ICmpInst::ICMP_SLT,
-                            Trunc, Constant::getNullValue(NTy));
-      }
+  // (X & C2) == 0 -> (trunc X) >= 0
+  // (X & C2) != 0 -> (trunc X) <  0
+  //   iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
+  const APInt *C2;
+  if (And->hasOneUse() && *C == 0 && match(Y, m_APInt(C2))) {
+    int32_t ExactLogBase2 = C2->exactLogBase2();
+    if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
+      Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
+      if (And->getType()->isVectorTy())
+        NTy = VectorType::get(NTy, And->getType()->getVectorNumElements());
+      Value *Trunc = Builder->CreateTrunc(X, NTy);
+      auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE
+                                                            : CmpInst::ICMP_SLT;
+      return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy));
     }
   }
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll
index ce6894f..d4aa1c5 100644
--- a/llvm/test/Transforms/InstCombine/and-compare.ll
+++ b/llvm/test/Transforms/InstCombine/and-compare.ll
@@ -42,11 +42,10 @@ define i1 @test2(i64 %A) {
   ret i1 %cmp
 }
 
-; FIXME: Vectors should fold the same way.
 define <2 x i1> @test2vec(<2 x i64> %A) {
 ; CHECK-LABEL: @test2vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> %A, <i64 128, i64 128>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i64> [[AND]], zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> %A to <2 x i8>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[TMP1]], <i8 -1, i8 -1>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %and = and <2 x i64> %A, <i64 128, i64 128>
@@ -65,11 +64,10 @@ define i1 @test3(i64 %A) {
   ret i1 %cmp
 }
 
-; FIXME: Vectors should fold the same way.
 define <2 x i1> @test3vec(<2 x i64> %A) {
 ; CHECK-LABEL: @test3vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> %A, <i64 128, i64 128>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i64> [[AND]], zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> %A to <2 x i8>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %and = and <2 x i64> %A, <i64 128, i64 128>
diff --git a/llvm/test/Transforms/InstCombine/apint-shift.ll b/llvm/test/Transforms/InstCombine/apint-shift.ll
index 3e1699a..50fbe74 100644
--- a/llvm/test/Transforms/InstCombine/apint-shift.ll
+++ b/llvm/test/Transforms/InstCombine/apint-shift.ll
@@ -280,11 +280,9 @@ define i1 @test19a(i39 %A) {
   ret i1 %C
 }
 
-; FIXME: Vectors should fold too.
 define <2 x i1> @test19a_vec(<2 x i39> %A) {
 ; CHECK-LABEL: @test19a_vec(
-; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i39> %A, <i39 -4, i39 -4>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i39> [[B_MASK]], <i39 -4, i39 -4>
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i39> %A, <i39 -5, i39 -5>
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = ashr <2 x i39> %A, <i39 2, i39 2>
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index a8d55b1..93e0cdf 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -1815,11 +1815,9 @@ define i1 @icmp_and_X_-16_eq-16(i32 %X) {
   ret i1 %cmp
 }
 
-; FIXME: Vectors should fold the same way.
 define <2 x i1> @icmp_and_X_-16_eq-16_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_and_X_-16_eq-16_vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> %X, <i32 -16, i32 -16>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[AND]], <i32 -16, i32 -16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> %X, <i32 -17, i32 -17>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %and = and <2 x i32> %X, <i32 -16, i32 -16>
@@ -1837,11 +1835,9 @@ define i1 @icmp_and_X_-16_ne-16(i32 %X) {
   ret i1 %cmp
 }
 
-; FIXME: Vectors should fold the same way.
 define <2 x i1> @icmp_and_X_-16_ne-16_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_and_X_-16_ne-16_vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> %X, <i32 -16, i32 -16>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], <i32 -16, i32 -16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> %X, <i32 -16, i32 -16>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %and = and <2 x i32> %X, <i32 -16, i32 -16>
diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll
index a91038f..4629506 100644
--- a/llvm/test/Transforms/InstCombine/shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift.ll
@@ -373,11 +373,9 @@ define i1 @test19a(i32 %A) {
   ret i1 %C
 }
 
-; FIXME: Vectors should fold the same way.
 define <2 x i1> @test19a_vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test19a_vec(
-; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i32> %A, <i32 -4, i32 -4>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], <i32 -4, i32 -4>
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> %A, <i32 -5, i32 -5>
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = ashr <2 x i32> %A, <i32 2, i32 2>
-- 
2.7.4