[InstCombine] use m_APInt to allow icmp (and X, Y), C folds for splat constant vectors

author Sanjay Patel <spatel@rotateright.com>

Sun, 28 Aug 2016 18:18:00 +0000 (18:18 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Sun, 28 Aug 2016 18:18:00 +0000 (18:18 +0000)
author Sanjay Patel <spatel@rotateright.com>
Sun, 28 Aug 2016 18:18:00 +0000 (18:18 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Sun, 28 Aug 2016 18:18:00 +0000 (18:18 +0000)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

index b0802a3..81dcbb7 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1610,48 +1610,50 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
    if (Instruction *I = foldICmpAndConstConst(Cmp, And, C))
      return I;
  
-  // FIXME: This check restricts all folds under here to scalar types.
-  ConstantInt *RHS = dyn_cast<ConstantInt>(Cmp.getOperand(1));
-  if (!RHS)
-    return nullptr;
-
-  // Try to optimize things like "A[i]&42 == 0" to index computations.
-  if (LoadInst *LI = dyn_cast<LoadInst>(And->getOperand(0))) {
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
-      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+  // TODO: These all require that Y is constant too, so refactor with the above.
+
+  // Try to optimize things like "A[i] & 42 == 0" to index computations.
+  Value *X = And->getOperand(0);
+  Value *Y = And->getOperand(1);
+  if (auto *LI = dyn_cast<LoadInst>(X))
+    if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+      if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
          if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
-            !LI->isVolatile() && isa<ConstantInt>(And->getOperand(1))) {
-          ConstantInt *C = cast<ConstantInt>(And->getOperand(1));
-          if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C))
+            !LI->isVolatile() && isa<ConstantInt>(Y)) {
+          ConstantInt *C2 = cast<ConstantInt>(Y);
+          if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C2))
              return Res;
          }
-  }
+
+  if (!Cmp.isEquality())
+    return nullptr;
  
    // X & -C == -C -> X >  u ~C
    // X & -C != -C -> X <= u ~C
    //   iff C is a power of 2
-  if (Cmp.isEquality() && RHS == And->getOperand(1) && (-(*C)).isPowerOf2())
-    return new ICmpInst(Cmp.getPredicate() == ICmpInst::ICMP_EQ
-                            ? ICmpInst::ICMP_UGT
-                            : ICmpInst::ICMP_ULE,
-                        And->getOperand(0), SubOne(RHS));
+  if (Cmp.getOperand(1) == Y && (-(*C)).isPowerOf2()) {
+    auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT
+                                                          : CmpInst::ICMP_ULE;
+    return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
+  }
  
-  // (icmp eq (and %A, C), 0) -> (icmp sgt (trunc %A), -1)
-  //   iff C is a power of 2
-  if (Cmp.isEquality() && And->hasOneUse() && match(RHS, m_Zero())) {
-    if (auto *CI = dyn_cast<ConstantInt>(And->getOperand(1))) {
-      const APInt &AI = CI->getValue();
-      int32_t ExactLogBase2 = AI.exactLogBase2();
-      if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
-        Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
-        Value *Trunc = Builder->CreateTrunc(And->getOperand(0), NTy);
-        return new ICmpInst(Cmp.getPredicate() == ICmpInst::ICMP_EQ
-                                ? ICmpInst::ICMP_SGE
-                                : ICmpInst::ICMP_SLT,
-                            Trunc, Constant::getNullValue(NTy));
-      }
+  // (X & C2) == 0 -> (trunc X) >= 0
+  // (X & C2) != 0 -> (trunc X) <  0
+  //   iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
+  const APInt *C2;
+  if (And->hasOneUse() && *C == 0 && match(Y, m_APInt(C2))) {
+    int32_t ExactLogBase2 = C2->exactLogBase2();
+    if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
+      Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
+      if (And->getType()->isVectorTy())
+        NTy = VectorType::get(NTy, And->getType()->getVectorNumElements());
+      Value *Trunc = Builder->CreateTrunc(X, NTy);
+      auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE
+                                                            : CmpInst::ICMP_SLT;
+      return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy));
      }
    }
+
    return nullptr;
  }
  
diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll

index ce6894f..d4aa1c5 100644 (file)
--- a/llvm/test/Transforms/InstCombine/and-compare.ll
+++ b/llvm/test/Transforms/InstCombine/and-compare.ll
@@ -42,11 +42,10 @@ define i1 @test2(i64 %A) {
    ret i1 %cmp
  }
  
-; FIXME: Vectors should fold the same way.
  define <2 x i1> @test2vec(<2 x i64> %A) {
  ; CHECK-LABEL: @test2vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> %A, <i64 128, i64 128>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i64> [[AND]], zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> %A to <2 x i8>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[TMP1]], <i8 -1, i8 -1>
  ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
  ;
    %and = and <2 x i64> %A, <i64 128, i64 128>
@@ -65,11 +64,10 @@ define i1 @test3(i64 %A) {
    ret i1 %cmp
  }
  
-; FIXME: Vectors should fold the same way.
  define <2 x i1> @test3vec(<2 x i64> %A) {
  ; CHECK-LABEL: @test3vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> %A, <i64 128, i64 128>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i64> [[AND]], zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> %A to <2 x i8>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], zeroinitializer
  ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
  ;
    %and = and <2 x i64> %A, <i64 128, i64 128>
diff --git a/llvm/test/Transforms/InstCombine/apint-shift.ll b/llvm/test/Transforms/InstCombine/apint-shift.ll

index 3e1699a..50fbe74 100644 (file)
--- a/llvm/test/Transforms/InstCombine/apint-shift.ll
+++ b/llvm/test/Transforms/InstCombine/apint-shift.ll
@@ -280,11 +280,9 @@ define i1 @test19a(i39 %A) {
    ret i1 %C
  }
  
-; FIXME: Vectors should fold too.
  define <2 x i1> @test19a_vec(<2 x i39> %A) {
  ; CHECK-LABEL: @test19a_vec(
-; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i39> %A, <i39 -4, i39 -4>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i39> [[B_MASK]], <i39 -4, i39 -4>
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i39> %A, <i39 -5, i39 -5>
  ; CHECK-NEXT:    ret <2 x i1> [[C]]
  ;
    %B = ashr <2 x i39> %A, <i39 2, i39 2>
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll

index a8d55b1..93e0cdf 100644 (file)
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -1815,11 +1815,9 @@ define i1 @icmp_and_X_-16_eq-16(i32 %X) {
    ret i1 %cmp
  }
  
-; FIXME: Vectors should fold the same way.
  define <2 x i1> @icmp_and_X_-16_eq-16_vec(<2 x i32> %X) {
  ; CHECK-LABEL: @icmp_and_X_-16_eq-16_vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> %X, <i32 -16, i32 -16>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[AND]], <i32 -16, i32 -16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> %X, <i32 -17, i32 -17>
  ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
  ;
    %and = and <2 x i32> %X, <i32 -16, i32 -16>
@@ -1837,11 +1835,9 @@ define i1 @icmp_and_X_-16_ne-16(i32 %X) {
    ret i1 %cmp
  }
  
-; FIXME: Vectors should fold the same way.
  define <2 x i1> @icmp_and_X_-16_ne-16_vec(<2 x i32> %X) {
  ; CHECK-LABEL: @icmp_and_X_-16_ne-16_vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> %X, <i32 -16, i32 -16>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], <i32 -16, i32 -16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> %X, <i32 -16, i32 -16>
  ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
  ;
    %and = and <2 x i32> %X, <i32 -16, i32 -16>
diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll

index a91038f..4629506 100644 (file)
--- a/llvm/test/Transforms/InstCombine/shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift.ll
@@ -373,11 +373,9 @@ define i1 @test19a(i32 %A) {
    ret i1 %C
  }
  
-; FIXME: Vectors should fold the same way.
  define <2 x i1> @test19a_vec(<2 x i32> %A) {
  ; CHECK-LABEL: @test19a_vec(
-; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i32> %A, <i32 -4, i32 -4>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], <i32 -4, i32 -4>
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> %A, <i32 -5, i32 -5>
  ; CHECK-NEXT:    ret <2 x i1> [[C]]
  ;
    %B = ashr <2 x i32> %A, <i32 2, i32 2>
author	Sanjay Patel <spatel@rotateright.com>
	Sun, 28 Aug 2016 18:18:00 +0000 (18:18 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Sun, 28 Aug 2016 18:18:00 +0000 (18:18 +0000)
llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/and-compare.ll		patch \| blob \| history
llvm/test/Transforms/InstCombine/apint-shift.ll		patch \| blob \| history
llvm/test/Transforms/InstCombine/icmp.ll		patch \| blob \| history
llvm/test/Transforms/InstCombine/shift.ll		patch \| blob \| history