From 07b1ba54b5403617523b0501adc83c2b91d782ab Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 24 May 2017 22:58:17 +0000
Subject: [PATCH] [InstCombine] use m_APInt to allow icmp-mul-mul vector fold

The swapped operands in the first test is a manifestation of an
inefficiency for vectors that doesn't exist for scalars because
the IRBuilder checks for an all-ones mask for scalars, but not
vectors.

llvm-svn: 303818
---
 .../Transforms/InstCombine/InstCombineCompares.cpp | 23 +++++++++++-----------
 llvm/test/Transforms/InstCombine/icmp.ll           | 10 ++++------
 2 files changed, 16 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 5ca0ed2..a855a0d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3052,26 +3052,27 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
       }
       break;
     }
-    case Instruction::Mul:
+    case Instruction::Mul: {
       if (!I.isEquality())
         break;
 
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
-        // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
-        // Mask = -1 >> count-trailing-zeros(Cst).
-        if (!CI->isZero() && !CI->isOne()) {
-          const APInt &AP = CI->getValue();
-          ConstantInt *Mask = ConstantInt::get(
-              I.getContext(),
-              APInt::getLowBitsSet(AP.getBitWidth(),
-                                   AP.getBitWidth() - AP.countTrailingZeros()));
+      const APInt *C;
+      if (match(BO0->getOperand(1), m_APInt(C))) {
+        // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
+        // Mask = -1 >> count-trailing-zeros(C).
+        if (*C != 0 && *C != 1) {
+          // FIXME: If trailing zeros is 0, don't bother creating Mask.
+          Constant *Mask = ConstantInt::get(
+              BO0->getType(),
+              APInt::getLowBitsSet(C->getBitWidth(),
+                                   C->getBitWidth() - C->countTrailingZeros()));
           Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
           Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
           return new ICmpInst(Pred, And1, And2);
         }
       }
       break;
-
+    }
     case Instruction::UDiv:
     case Instruction::LShr:
       if (I.isSigned() || !BO0->isExact() || !BO1->isExact())
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 9a952ba..39df422 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -2920,9 +2920,7 @@ define i1 @eq_mul_constants(i32 %x, i32 %y) {
 
 define <2 x i1> @eq_mul_constants_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @eq_mul_constants_splat(
-; CHECK-NEXT:    [[A:%.*]] = mul <2 x i32> %x, <i32 5, i32 5>
-; CHECK-NEXT:    [[B:%.*]] = mul <2 x i32> %y, <i32 5, i32 5>
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[A]], [[B]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> %y, %x
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %A = mul <2 x i32> %x, <i32 5, i32 5>
@@ -2950,9 +2948,9 @@ define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) {
 
 define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @eq_mul_constants_with_tz_splat(
-; CHECK-NEXT:    [[A:%.*]] = mul <2 x i32> %x, <i32 12, i32 12>
-; CHECK-NEXT:    [[B:%.*]] = mul <2 x i32> %y, <i32 12, i32 12>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[A]], [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> %x, %y
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %A = mul <2 x i32> %x, <i32 12, i32 12>
-- 
2.7.4