[X86][AVX] Rename + cleanup lowerShuffleAsLanePermuteAndBlend. NFCI.

author Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 1 Sep 2019 16:04:28 +0000 (16:04 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 1 Sep 2019 16:04:28 +0000 (16:04 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 1 Sep 2019 16:04:28 +0000 (16:04 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 1 Sep 2019 16:04:28 +0000 (16:04 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 2fba796..0dd84ab 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14673,16 +14673,14 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
    return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask);
  }
  
-/// Lower a vector shuffle crossing multiple 128-bit lanes as
-/// a permutation and blend of those lanes.
+/// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one
+/// source with a lane permutation.
  ///
-/// This essentially blends the out-of-lane inputs to each lane into the lane
-/// from a permuted copy of the vector. This lowering strategy results in four
-/// instructions in the worst case for a single-input cross lane shuffle which
-/// is lower than any other fully general cross-lane shuffle strategy I'm aware
-/// of. Special cases for each particular shuffle pattern should be handled
-/// prior to trying this lowering.
-static SDValue lowerShuffleAsLanePermuteAndBlend(
+/// This lowering strategy results in four instructions in the worst case for a
+/// single-input cross lane shuffle which is lower than any other fully general
+/// cross-lane shuffle strategy I'm aware of. Special cases for each particular
+/// shuffle pattern should be handled prior to trying this lowering.
+static SDValue lowerShuffleAsLanePermuteAndShuffle(
      const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
      SelectionDAG &DAG, const X86Subtarget &Subtarget) {
    // FIXME: This should probably be generalized for 512-bit vectors as well.
@@ -14709,24 +14707,28 @@ static SDValue lowerShuffleAsLanePermuteAndBlend(
        return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
    }
  
+  // TODO - we could support shuffling V2 in the Flipped input.
    assert(V2.isUndef() &&
           "This last part of this routine only works on single input shuffles");
  
-  SmallVector<int, 32> FlippedBlendMask(Size);
-  for (int i = 0; i < Size; ++i)
-    FlippedBlendMask[i] =
-        Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize)
-                                ? Mask[i]
-                                : Mask[i] % LaneSize +
-                                      (i / LaneSize) * LaneSize + Size);
+  SmallVector<int, 32> InLaneMask(Mask.begin(), Mask.end());
+  for (int i = 0; i < Size; ++i) {
+    int &M = InLaneMask[i];
+    if (M < 0)
+      continue;
+    if (((M % Size) / LaneSize) != (i / LaneSize))
+      M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
+  }
+  assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) &&
+         "In-lane shuffle mask expected");
  
-  // Flip the vector, and blend the results which should now be in-lane.
+  // Flip the lanes, and shuffle the results which should now be in-lane.
    MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
    SDValue Flipped = DAG.getBitcast(PVT, V1);
-  Flipped = DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT),
-                                 { 2, 3, 0, 1 });
+  Flipped =
+      DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), {2, 3, 0, 1});
    Flipped = DAG.getBitcast(VT, Flipped);
-  return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask);
+  return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
  }
  
  /// Handle lowering 2-lane 128-bit shuffles.
@@ -15487,8 +15489,8 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
        return V;
  
      // Otherwise, fall back.
-    return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask, DAG,
-                                             Subtarget);
+    return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v4f64, V1, V2, Mask,
+                                               DAG, Subtarget);
    }
  
    // Use dedicated unpack instructions for masks that match their pattern.
@@ -15704,8 +15706,8 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
        return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1);
  
      // Otherwise, fall back.
-    return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
-                                             DAG, Subtarget);
+    return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v8f32, V1, V2, Mask,
+                                               DAG, Subtarget);
    }
  
    // Try to simplify this by merging 128-bit lanes to enable a lane-based
@@ -15912,8 +15914,8 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
                DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
          return V;
  
-      return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2, Mask,
-                                               DAG, Subtarget);
+      return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v16i16, V1, V2, Mask,
+                                                 DAG, Subtarget);
      }
  
      SmallVector<int, 8> RepeatedMask;
@@ -16011,8 +16013,8 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
              DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
        return V;
  
-    return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, DAG,
-                                             Subtarget);
+    return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v32i8, V1, V2, Mask,
+                                               DAG, Subtarget);
    }
  
    if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2,
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 1 Sep 2019 16:04:28 +0000 (16:04 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 1 Sep 2019 16:04:28 +0000 (16:04 +0000)