[X86][SSE] Convert computeZeroableShuffleElements to emit KnownUndef and KnownZero

author Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 31 Oct 2019 11:21:39 +0000 (11:21 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 31 Oct 2019 11:21:39 +0000 (11:21 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 31 Oct 2019 11:21:39 +0000 (11:21 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 31 Oct 2019 11:21:39 +0000 (11:21 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index ee80d81..13d62dc 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6732,7 +6732,7 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
    V1 = peekThroughBitcasts(V1);
    V2 = peekThroughBitcasts(V2);
  
-  assert((VT.getSizeInBits() % Mask.size()) == 0 &&
+  assert((VT.getSizeInBits() % Size) == 0 &&
           "Illegal split of shuffle value type");
    unsigned EltSizeInBits = VT.getSizeInBits() / Size;
  
@@ -10423,9 +10423,12 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
  /// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
  /// as many lanes with this technique as possible to simplify the remaining
  /// shuffle.
-static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
-                                            SDValue V1, SDValue V2) {
-  APInt Zeroable(Mask.size(), 0);
+static void computeZeroableShuffleElements(ArrayRef<int> Mask,
+                                           SDValue V1, SDValue V2,
+                                           APInt &KnownUndef, APInt &KnownZero) {
+  int Size = Mask.size();
+  KnownUndef = KnownZero = APInt::getNullValue(Size);
+
    V1 = peekThroughBitcasts(V1);
    V2 = peekThroughBitcasts(V2);
  
@@ -10433,14 +10436,18 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
    bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
  
    int VectorSizeInBits = V1.getValueSizeInBits();
-  int ScalarSizeInBits = VectorSizeInBits / Mask.size();
+  int ScalarSizeInBits = VectorSizeInBits / Size;
    assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
  
-  for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+  for (int i = 0; i < Size; ++i) {
      int M = Mask[i];
      // Handle the easy cases.
-    if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
-      Zeroable.setBit(i);
+    if (M < 0) {
+      KnownUndef.setBit(i);
+      continue;
+    }
+    if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
+      KnownZero.setBit(i);
        continue;
      }
  
@@ -10457,20 +10464,20 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
      if ((Size % V.getNumOperands()) == 0) {
        int Scale = Size / V->getNumOperands();
        SDValue Op = V.getOperand(M / Scale);
-      if (Op.isUndef() || X86::isZeroNode(Op))
-        Zeroable.setBit(i);
+      if (Op.isUndef())
+        KnownUndef.setBit(i);
+      if (X86::isZeroNode(Op))
+        KnownZero.setBit(i);
        else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
          APInt Val = Cst->getAPIntValue();
-        Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
-        Val = Val.getLoBits(ScalarSizeInBits);
+        Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
          if (Val == 0)
-          Zeroable.setBit(i);
+          KnownZero.setBit(i);
        } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
          APInt Val = Cst->getValueAPF().bitcastToAPInt();
-        Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
-        Val = Val.getLoBits(ScalarSizeInBits);
+        Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
          if (Val == 0)
-          Zeroable.setBit(i);
+          KnownZero.setBit(i);
        }
        continue;
      }
@@ -10479,18 +10486,20 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
      // elements must be UNDEF or ZERO.
      if ((V.getNumOperands() % Size) == 0) {
        int Scale = V->getNumOperands() / Size;
-      bool AllZeroable = true;
+      bool AllUndef = true;
+      bool AllZero = true;
        for (int j = 0; j < Scale; ++j) {
          SDValue Op = V.getOperand((M * Scale) + j);
-        AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op));
+        AllUndef &= Op.isUndef();
+        AllZero &= X86::isZeroNode(Op);
        }
-      if (AllZeroable)
-        Zeroable.setBit(i);
+      if (AllUndef)
+        KnownUndef.setBit(i);
+      if (AllZero)
+        KnownZero.setBit(i);
        continue;
      }
    }
-
-  return Zeroable;
  }
  
  // The Shuffle result is as follow:
@@ -17077,7 +17086,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
    // We actually see shuffles that are entirely re-arrangements of a set of
    // zero inputs. This mostly happens while decomposing complex shuffles into
    // simple ones. Directly lower these as a buildvector of zeros.
-  APInt Zeroable = computeZeroableShuffleElements(OrigMask, V1, V2);
+  APInt KnownUndef, KnownZero;
+  computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero);
+
+  APInt Zeroable = KnownUndef | KnownZero;
    if (Zeroable.isAllOnesValue())
      return getZeroVector(VT, Subtarget, DAG, DL);
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 31 Oct 2019 11:21:39 +0000 (11:21 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 31 Oct 2019 11:21:39 +0000 (11:21 +0000)