[InstCombine] Support vector splats in foldSelectICmpAnd.

author Craig Topper <craig.topper@intel.com>

Sat, 5 Aug 2017 20:00:41 +0000 (20:00 +0000)

committer Craig Topper <craig.topper@intel.com>

Sat, 5 Aug 2017 20:00:41 +0000 (20:00 +0000)
author Craig Topper <craig.topper@intel.com>
Sat, 5 Aug 2017 20:00:41 +0000 (20:00 +0000)
committer Craig Topper <craig.topper@intel.com>
Sat, 5 Aug 2017 20:00:41 +0000 (20:00 +0000)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

index b5b8aa3..92f3c1d 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -597,18 +597,24 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
  /// icmp instruction with zero, and we have an 'and' with the non-constant value
  /// and a power of two we can turn the select into a shift on the result of the
  /// 'and'.
-static Value *foldSelectICmpAnd(const SelectInst &SI, const ICmpInst *IC,
+static Value *foldSelectICmpAnd(Type *SelType, const ICmpInst *IC,
                                  APInt TrueVal, APInt FalseVal,
                                  InstCombiner::BuilderTy &Builder) {
-  if (!IC->isEquality() || !SI.getType()->isIntegerTy())
+  assert(SelType->isIntOrIntVectorTy() && "Not an integer select?");
+
+  // If this is a vector select, we need a vector compare.
+  if (SelType->isVectorTy() != IC->getType()->isVectorTy())
+    return nullptr;
+
+  if (!IC->isEquality())
      return nullptr;
  
    if (!match(IC->getOperand(1), m_Zero()))
      return nullptr;
  
-  ConstantInt *AndRHS;
+  const APInt *AndRHS;
    Value *LHS = IC->getOperand(0);
-  if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
+  if (!match(LHS, m_And(m_Value(), m_Power2(AndRHS))))
      return nullptr;
  
    // If both select arms are non-zero see if we have a select of the form
@@ -628,28 +634,27 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, const ICmpInst *IC,
      FalseVal -= Offset;
    }
  
-  // Make sure the mask in the 'and' and one of the select arms is a power of 2.
-  if (!AndRHS->getValue().isPowerOf2() ||
-      (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2()))
+  // Make sure one of the select arms is a power of 2.
+  if (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2())
      return nullptr;
  
    // Determine which shift is needed to transform result of the 'and' into the
    // desired result.
    const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal;
    unsigned ValZeros = ValC.logBase2();
-  unsigned AndZeros = AndRHS->getValue().logBase2();
+  unsigned AndZeros = AndRHS->logBase2();
  
    // If types don't match we can still convert the select by introducing a zext
    // or a trunc of the 'and'.
    Value *V = LHS;
    if (ValZeros > AndZeros) {
-    V = Builder.CreateZExtOrTrunc(V, SI.getType());
+    V = Builder.CreateZExtOrTrunc(V, SelType);
      V = Builder.CreateShl(V, ValZeros - AndZeros);
    } else if (ValZeros < AndZeros) {
      V = Builder.CreateLShr(V, AndZeros - ValZeros);
-    V = Builder.CreateZExtOrTrunc(V, SI.getType());
+    V = Builder.CreateZExtOrTrunc(V, SelType);
    } else
-    V = Builder.CreateZExtOrTrunc(V, SI.getType());
+    V = Builder.CreateZExtOrTrunc(V, SelType);
  
    // Okay, now we know that everything is set up, we just don't know whether we
    // have a icmp_ne or icmp_eq and whether the true or false val is the zero.
@@ -670,11 +675,14 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
    Value *TrueVal = SI.getTrueValue();
    Value *FalseVal = SI.getFalseValue();
  
-  if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
-    if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal))
-      if (Value *V = foldSelectICmpAnd(SI, ICI, TrueValC->getValue(),
-                                       FalseValC->getValue(), Builder))
+  {
+    const APInt *TrueValC, *FalseValC;
+    if (match(TrueVal, m_APInt(TrueValC)) &&
+        match(FalseVal, m_APInt(FalseValC)))
+      if (Value *V = foldSelectICmpAnd(SI.getType(), ICI, *TrueValC,
+                                       *FalseValC, Builder))
          return replaceInstUsesWith(SI, V);
+  }
  
    if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder))
      return NewSel;
diff --git a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll

index c92a749..7580fad 100644 (file)
--- a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll
+++ b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll
@@ -268,6 +268,21 @@ define i32 @test65(i64 %x) {
    ret i32 %3
  }
  
+define <2 x i32> @test65vec(<2 x i64> %x) {
+; CHECK-LABEL: @test65vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], <i64 16, i64 16>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact <2 x i64> [[TMP1]], <i64 3, i64 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> [[TMP3]], <i32 40, i32 40>
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+;
+  %1 = and <2 x i64> %x, <i64 16, i64 16>
+  %2 = icmp ne <2 x i64> %1, zeroinitializer
+  %3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+  ret <2 x i32> %3
+}
+
  define i32 @test66(i64 %x) {
  ; CHECK-LABEL: @test66(
  ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[X:%.*]], 31
@@ -282,6 +297,35 @@ define i32 @test66(i64 %x) {
    ret i32 %3
  }
  
+define <2 x i32> @test66vec(<2 x i64> %x) {
+; CHECK-LABEL: @test66vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], <i64 4294967296, i64 4294967296>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact <2 x i64> [[TMP1]], <i64 31, i64 31>
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> [[TMP3]], <i32 40, i32 40>
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+;
+  %1 = and <2 x i64> %x, <i64 4294967296, i64 4294967296>
+  %2 = icmp ne <2 x i64> %1, zeroinitializer
+  %3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+  ret <2 x i32> %3
+}
+
+; Make sure we don't try to optimize a scalar 'and' with a vector select.
+define <2 x i32> @test66vec_scalar_and(i64 %x) {
+; CHECK-LABEL: @test66vec_scalar_and(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967296
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %1 = and i64 %x, 4294967296
+  %2 = icmp ne i64 %1, 0
+  %3 = select i1 %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+  ret <2 x i32> %3
+}
+
  define i32 @test67(i16 %x) {
  ; CHECK-LABEL: @test67(
  ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i16 %x, 1
@@ -296,6 +340,21 @@ define i32 @test67(i16 %x) {
    ret i32 %3
  }
  
+define <2 x i32> @test67vec(<2 x i16> %x) {
+; CHECK-LABEL: @test67vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 4, i16 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact <2 x i16> [[TMP1]], <i16 1, i16 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i16> [[TMP2]], <i16 40, i16 40>
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i16> [[TMP3]], <i16 2, i16 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i16> [[TMP4]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+;
+  %1 = and <2 x i16> %x, <i16 4, i16 4>
+  %2 = icmp ne <2 x i16> %1, zeroinitializer
+  %3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+  ret <2 x i32> %3
+}
+
  define i32 @test68(i32 %x, i32 %y) {
  ; CHECK-LABEL: @test68(
  ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6
author	Craig Topper <craig.topper@intel.com>
	Sat, 5 Aug 2017 20:00:41 +0000 (20:00 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Sat, 5 Aug 2017 20:00:41 +0000 (20:00 +0000)
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll		patch \| blob \| history