[X86][SSE] Consistently use the target shuffle root value type for vector size calcul...

author Simon Pilgrim <llvm-dev@redking.me.uk>

Fri, 5 Aug 2016 13:02:53 +0000 (13:02 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Fri, 5 Aug 2016 13:02:53 +0000 (13:02 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Fri, 5 Aug 2016 13:02:53 +0000 (13:02 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Fri, 5 Aug 2016 13:02:53 +0000 (13:02 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index f52a1ec..35730c6 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25092,8 +25092,10 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
  
    MVT VT = Input.getSimpleValueType();
    MVT RootVT = Root.getSimpleValueType();
-  SDLoc DL(Root);
+  assert(VT.getSizeInBits() == RootVT.getSizeInBits() &&
+         "Vector size mismatch");
  
+  SDLoc DL(Root);
    SDValue Res;
  
    unsigned NumBaseMaskElts = BaseMask.size();
@@ -25106,6 +25108,8 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
  
    unsigned RootSizeInBits = RootVT.getSizeInBits();
    unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
+  bool FloatDomain = VT.isFloatingPoint() ||
+                     (RootVT.is256BitVector() && !Subtarget.hasAVX2());
  
    // Don't combine if we are a AVX512/EVEX target and the mask element size
    // is different from the root element size - this would prevent writemasks
@@ -25122,12 +25126,11 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
    // TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
  
    // Handle 128-bit lane shuffles of 256-bit vectors.
-  if (VT.is256BitVector() && NumBaseMaskElts == 2 &&
+  if (RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
        !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
      if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128)
        return false; // Nothing to do!
-    MVT ShuffleVT = (VT.isFloatingPoint() || !Subtarget.hasAVX2() ? MVT::v4f64
-                                                                  : MVT::v4i64);
+    MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
      unsigned PermMask = 0;
      PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
      PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
@@ -25158,9 +25161,7 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
    unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts;
  
    // Determine the effective mask value type.
-  bool FloatDomain =
-      (VT.isFloatingPoint() || (VT.is256BitVector() && !Subtarget.hasAVX2())) &&
-      (32 <= MaskEltSizeInBits);
+  FloatDomain &= (32 <= MaskEltSizeInBits);
    MVT MaskVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits)
                             : MVT::getIntegerVT(MaskEltSizeInBits);
    MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts);
@@ -25265,11 +25266,11 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
    // instructions, but in practice PSHUFB tends to be *very* fast so we're
    // more aggressive.
    if ((Depth >= 3 || HasVariableMask) &&
-      ((VT.is128BitVector() && Subtarget.hasSSSE3()) ||
-       (VT.is256BitVector() && Subtarget.hasAVX2()) ||
-       (VT.is512BitVector() && Subtarget.hasBWI()))) {
+      ((RootVT.is128BitVector() && Subtarget.hasSSSE3()) ||
+       (RootVT.is256BitVector() && Subtarget.hasAVX2()) ||
+       (RootVT.is512BitVector() && Subtarget.hasBWI()))) {
      SmallVector<SDValue, 16> PSHUFBMask;
-    int NumBytes = VT.getSizeInBits() / 8;
+    int NumBytes = RootVT.getSizeInBits() / 8;
      int Ratio = NumBytes / NumMaskElts;
      for (int i = 0; i < NumBytes; ++i) {
        int M = Mask[i / Ratio];
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Fri, 5 Aug 2016 13:02:53 +0000 (13:02 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Fri, 5 Aug 2016 13:02:53 +0000 (13:02 +0000)