return (FstHalf | (SndHalf << 4));
}
-// Symmetric in-lane mask. Each lane has 4 elements (for imm8)
+// Symetric in-lane mask. Each lane has 4 elements (for imm8)
static bool isPermImmMask(ArrayRef<int> Mask, MVT VT, unsigned& Imm8) {
- unsigned NumElts = VT.getVectorNumElements();
- if (!(VT.is256BitVector() && NumElts == 4) &&
- !(VT.is512BitVector() && NumElts == 8))
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize < 32)
return false;
+ unsigned NumElts = VT.getVectorNumElements();
Imm8 = 0;
+ if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ Imm8 |= Mask[i] << (i*2);
+ }
+ return true;
+ }
+
unsigned LaneSize = 4;
+ SmallVector<int, 4> MaskVal(LaneSize, -1);
+
for (unsigned l = 0; l != NumElts; l += LaneSize) {
for (unsigned i = 0; i != LaneSize; ++i) {
if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
return false;
- if (Mask[i] >= 0 && !isUndefOrEqual(Mask[i+l], Mask[i]+l))
+ if (Mask[i+l] < 0)
+ continue;
+ if (MaskVal[i] < 0) {
+ MaskVal[i] = Mask[i+l] - l;
+ Imm8 |= MaskVal[i] << (i*2);
+ continue;
+ }
+ if (Mask[i+l] != (signed)(MaskVal[i]+l))
return false;
- if (Mask[i+l] >= 0)
- Imm8 |= (Mask[i+l] - l) << (i*2);
}
}
-
return true;
}
if (NumElts != 8 || l == 0)
continue;
// VPERMILPS handling
- if (Mask[i] >= 0 && !isUndefOrEqual(Mask[i+l], Mask[i]+l))
+ if (Mask[i] < 0)
+ continue;
+ if (!isUndefOrEqual(Mask[i+l], Mask[i]+l))
return false;
}
}
ret <16 x i32> %c
}
-; CHECK: test2b:
-; CHECK: vpermd
-; CHECK: ret
-define <16 x i32> @test2b(<16 x i32> %a) nounwind {
- %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
- ret <16 x i32> %c
-}
-
; CHECK: test3:
; CHECK: vpermq
; CHECK: ret