Summary: These were templated due to SelectionDAG using int masks for shuffles and IR using unsigned masks for shuffles. But now that D72467 has landed we have an int mask version of IRBuilder::CreateShuffleVector. So just use int instead of a template
Reviewers: spatel, efriedma, RKSimon
Reviewed By: efriedma
Subscribers: hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D77183
///
/// This is the reverse process of "canWidenShuffleElements", but can always
/// succeed.
-template <typename T>
-void scaleShuffleMask(size_t Scale, ArrayRef<T> Mask,
- SmallVectorImpl<T> &ScaledMask) {
- assert(Scale > 0 && "Unexpected scaling factor");
-
- // Fast-path: if no scaling, then it is just a copy.
- if (Scale == 1) {
- ScaledMask.assign(Mask.begin(), Mask.end());
- return;
- }
-
- ScaledMask.clear();
- for (int MaskElt : Mask)
- for (int ScaleElt = 0; ScaleElt != (int)Scale; ++ScaleElt)
- ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + ScaleElt);
-}
+void scaleShuffleMask(size_t Scale, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &ScaledMask);
/// Compute a map of integer instructions to their minimum legal type
/// size.
return false;
}
+void llvm::scaleShuffleMask(size_t Scale, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &ScaledMask) {
+ assert(Scale > 0 && "Unexpected scaling factor");
+
+ // Fast-path: if no scaling, then it is just a copy.
+ if (Scale == 1) {
+ ScaledMask.assign(Mask.begin(), Mask.end());
+ return;
+ }
+
+ ScaledMask.clear();
+ for (int MaskElt : Mask)
+ for (int ScaleElt = 0; ScaleElt != (int)Scale; ++ScaleElt)
+ ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + ScaleElt);
+}
+
MapVector<Instruction *, uint64_t>
llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
const TargetTransformInfo *TTI) {
ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
SmallVector<int, 8> InnerMask;
SmallVector<int, 8> OuterMask;
- scaleShuffleMask<int>(InnerScale, InnerSVN->getMask(), InnerMask);
- scaleShuffleMask<int>(OuterScale, SVN->getMask(), OuterMask);
+ scaleShuffleMask(InnerScale, InnerSVN->getMask(), InnerMask);
+ scaleShuffleMask(OuterScale, SVN->getMask(), OuterMask);
// Merge the shuffle masks.
SmallVector<int, 8> NewMask;
return SDValue();
}
+void llvm::createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
+ bool Lo, bool Unary) {
+ assert(Mask.empty() && "Expected an empty shuffle mask vector");
+ int NumElts = VT.getVectorNumElements();
+ int NumEltsInLane = 128 / VT.getScalarSizeInBits();
+ for (int i = 0; i < NumElts; ++i) {
+ unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
+ int Pos = (i % NumEltsInLane) / 2 + LaneStart;
+ Pos += (Unary ? 0 : NumElts * (i % 2));
+ Pos += (Lo ? 0 : NumEltsInLane / 2);
+ Mask.push_back(Pos);
+ }
+}
+
+/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
+/// imposed by AVX and specific to the unary pattern. Example:
+/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
+/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
+void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
+ bool Lo) {
+ assert(Mask.empty() && "Expected an empty shuffle mask vector");
+ int NumElts = VT.getVectorNumElements();
+ for (int i = 0; i < NumElts; ++i) {
+ int Pos = i / 2;
+ Pos += (Lo ? 0 : NumElts / 2);
+ Mask.push_back(Pos);
+ }
+}
+
/// Returns a vector_shuffle node for an unpackl operation.
static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
SDValue V1, SDValue V2) {
size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
SmallVector<int, 64> Mask0, Mask1;
- scaleShuffleMask<int>(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
- scaleShuffleMask<int>(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
+ scaleShuffleMask(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
+ scaleShuffleMask(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
for (size_t i = 0; i != MaskSize; ++i) {
if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef)
Mask.push_back(SM_SentinelUndef);
if ((NumSubElts % SubMask.size()) == 0) {
int Scale = NumSubElts / SubMask.size();
SmallVector<int,64> ScaledSubMask;
- scaleShuffleMask<int>(Scale, SubMask, ScaledSubMask);
+ scaleShuffleMask(Scale, SubMask, ScaledSubMask);
SubMask = ScaledSubMask;
} else {
int Scale = SubMask.size() / NumSubElts;
SmallVector<int, 2> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) {
SmallVector<int, 4> PSHUFDMask;
- scaleShuffleMask<int>(2, RepeatedMask, PSHUFDMask);
+ scaleShuffleMask(2, RepeatedMask, PSHUFDMask);
return DAG.getBitcast(
MVT::v4i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
SmallVector<int, 2> Widened256Mask;
if (canWidenShuffleElements(Widened128Mask, Widened256Mask)) {
Widened128Mask.clear();
- llvm::scaleShuffleMask<int>(2, Widened256Mask, Widened128Mask);
+ llvm::scaleShuffleMask(2, Widened256Mask, Widened128Mask);
}
// Try to lower to vshuf64x2/vshuf32x4.
SmallVector<int, 2> Repeated128Mask;
if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) {
SmallVector<int, 4> PSHUFDMask;
- scaleShuffleMask<int>(2, Repeated128Mask, PSHUFDMask);
+ scaleShuffleMask(2, Repeated128Mask, PSHUFDMask);
return DAG.getBitcast(
MVT::v8i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32,
// Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits.
SmallVector<int, 64> Mask;
int Scale = 64 / OutVT.getScalarSizeInBits();
- scaleShuffleMask<int>(Scale, ArrayRef<int>({ 0, 2, 1, 3 }), Mask);
+ scaleShuffleMask(Scale, { 0, 2, 1, 3 }, Mask);
Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask);
if (DstVT.is256BitVector())
// Narrow the repeated mask to create 32-bit element permutes.
SmallVector<int, 4> WordMask = RepeatedMask;
if (MaskScalarSizeInBits == 64)
- scaleShuffleMask<int>(2, RepeatedMask, WordMask);
+ scaleShuffleMask(2, RepeatedMask, WordMask);
Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI);
ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
if (BaseMaskEltSizeInBits > 64) {
assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
int MaskScale = BaseMaskEltSizeInBits / 64;
- scaleShuffleMask<int>(MaskScale, BaseMask, Mask);
+ scaleShuffleMask(MaskScale, BaseMask, Mask);
} else {
Mask = SmallVector<int, 64>(BaseMask.begin(), BaseMask.end());
}
if ((NumSrcElts % Mask.size()) == 0) {
SmallVector<int, 16> ScaledMask;
int Scale = NumSrcElts / Mask.size();
- scaleShuffleMask<int>(Scale, Mask, ScaledMask);
+ scaleShuffleMask(Scale, Mask, ScaledMask);
Mask = std::move(ScaledMask);
} else if ((Mask.size() % NumSrcElts) == 0) {
// Simplify Mask based on demanded element.
};
/// Generate unpacklo/unpackhi shuffle mask.
- template <typename T = int>
- void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
- bool Unary) {
- assert(Mask.empty() && "Expected an empty shuffle mask vector");
- int NumElts = VT.getVectorNumElements();
- int NumEltsInLane = 128 / VT.getScalarSizeInBits();
- for (int i = 0; i < NumElts; ++i) {
- unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
- int Pos = (i % NumEltsInLane) / 2 + LaneStart;
- Pos += (Unary ? 0 : NumElts * (i % 2));
- Pos += (Lo ? 0 : NumEltsInLane / 2);
- Mask.push_back(Pos);
- }
- }
+ void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
+ bool Unary);
/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
/// imposed by AVX and specific to the unary pattern. Example:
/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
- template <typename T = int>
- void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo) {
- assert(Mask.empty() && "Expected an empty shuffle mask vector");
- int NumElts = VT.getVectorNumElements();
- for (int i = 0; i < NumElts; ++i) {
- int Pos = i / 2;
- Pos += (Lo ? 0 : NumElts / 2);
- Mask.push_back(Pos);
- }
- }
+ void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
} // end namespace llvm
MVT VT = MVT::v8i16;
TransposedMatrix.resize(2);
- SmallVector<uint32_t, 16> MaskLow;
- SmallVector<uint32_t, 32> MaskLowTemp1, MaskLowWord;
- SmallVector<uint32_t, 32> MaskHighTemp1, MaskHighWord;
+ SmallVector<int, 16> MaskLow;
+ SmallVector<int, 32> MaskLowTemp1, MaskLowWord;
+ SmallVector<int, 32> MaskHighTemp1, MaskHighWord;
for (unsigned i = 0; i < 8; ++i) {
MaskLow.push_back(i);
MaskLow.push_back(i + 8);
}
- createUnpackShuffleMask<uint32_t>(VT, MaskLowTemp1, true, false);
- createUnpackShuffleMask<uint32_t>(VT, MaskHighTemp1, false, false);
- scaleShuffleMask<uint32_t>(2, MaskHighTemp1, MaskHighWord);
- scaleShuffleMask<uint32_t>(2, MaskLowTemp1, MaskLowWord);
+ createUnpackShuffleMask(VT, MaskLowTemp1, true, false);
+ createUnpackShuffleMask(VT, MaskHighTemp1, false, false);
+ scaleShuffleMask(2, MaskHighTemp1, MaskHighWord);
+ scaleShuffleMask(2, MaskLowTemp1, MaskLowWord);
// IntrVec1Low = c0 m0 c1 m1 c2 m2 c3 m3 c4 m4 c5 m5 c6 m6 c7 m7
// IntrVec2Low = y0 k0 y1 k1 y2 k2 y3 k3 y4 k4 y5 k5 y6 k6 y7 k7
Value *IntrVec1Low =
MVT HalfVT = scaleVectorType(VT);
TransposedMatrix.resize(4);
- SmallVector<uint32_t, 32> MaskHigh;
- SmallVector<uint32_t, 32> MaskLow;
- SmallVector<uint32_t, 32> LowHighMask[2];
- SmallVector<uint32_t, 32> MaskHighTemp;
- SmallVector<uint32_t, 32> MaskLowTemp;
+ SmallVector<int, 32> MaskHigh;
+ SmallVector<int, 32> MaskLow;
+ SmallVector<int, 32> LowHighMask[2];
+ SmallVector<int, 32> MaskHighTemp;
+ SmallVector<int, 32> MaskLowTemp;
// MaskHighTemp and MaskLowTemp built in the vpunpckhbw and vpunpcklbw X86
// shuffle pattern.
- createUnpackShuffleMask<uint32_t>(VT, MaskLow, true, false);
- createUnpackShuffleMask<uint32_t>(VT, MaskHigh, false, false);
+ createUnpackShuffleMask(VT, MaskLow, true, false);
+ createUnpackShuffleMask(VT, MaskHigh, false, false);
// MaskHighTemp1 and MaskLowTemp1 built in the vpunpckhdw and vpunpckldw X86
// shuffle pattern.
- createUnpackShuffleMask<uint32_t>(HalfVT, MaskLowTemp, true, false);
- createUnpackShuffleMask<uint32_t>(HalfVT, MaskHighTemp, false, false);
- scaleShuffleMask<uint32_t>(2, MaskLowTemp, LowHighMask[0]);
- scaleShuffleMask<uint32_t>(2, MaskHighTemp, LowHighMask[1]);
+ createUnpackShuffleMask(HalfVT, MaskLowTemp, true, false);
+ createUnpackShuffleMask(HalfVT, MaskHighTemp, false, false);
+ scaleShuffleMask(2, MaskLowTemp, LowHighMask[0]);
+ scaleShuffleMask(2, MaskHighTemp, LowHighMask[1]);
// IntrVec1Low = c0 m0 c1 m1 ... c7 m7 | c16 m16 c17 m17 ... c23 m23
// IntrVec1High = c8 m8 c9 m9 ... c15 m15 | c24 m24 c25 m25 ... c31 m31
TEST_F(BasicTest, scaleShuffleMask) {
SmallVector<int, 16> ScaledMask;
- scaleShuffleMask<int>(1, {3,2,0,-2}, ScaledMask);
- EXPECT_EQ(makeArrayRef<int>(ScaledMask), makeArrayRef<int>({3,2,0,-2}));
- scaleShuffleMask<int>(4, {3,2,0,-1}, ScaledMask);
- EXPECT_EQ(makeArrayRef<int>(ScaledMask), makeArrayRef<int>({12,13,14,15,8,9,10,11,0,1,2,3,-1,-1,-1,-1}));
+ scaleShuffleMask(1, {3,2,0,-2}, ScaledMask);
+ EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({3,2,0,-2}));
+ scaleShuffleMask(4, {3,2,0,-1}, ScaledMask);
+ EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({12,13,14,15,8,9,10,11,0,1,2,3,-1,-1,-1,-1}));
}
TEST_F(BasicTest, getSplatIndex) {