ShuffleMask.push_back(Element);
}
-void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
- unsigned NumElements = MaskTy->getVectorNumElements();
- if (NumElements == VT.getVectorNumElements()) {
- unsigned EltMaskSize = Log2_64(NumElements * 2);
- for (unsigned i = 0; i < NumElements; ++i) {
- Constant *COp = C->getAggregateElement(i);
- if (!COp) {
- ShuffleMask.clear();
- return;
- }
- if (isa<UndefValue>(COp))
- ShuffleMask.push_back(SM_SentinelUndef);
- else {
- APInt Element = cast<ConstantInt>(COp)->getValue();
- Element = Element.getLoBits(EltMaskSize);
- ShuffleMask.push_back(Element.getZExtValue());
- }
+ unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+ (void)MaskTySize;
+ assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+ "Unexpected vector size.");
+ assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
+ "Unexpected vector element size.");
+
+ // The shuffle mask requires elements the same size as the target.
+ SmallBitVector UndefElts;
+ SmallVector<uint64_t, 8> RawMask;
+ if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
+ return;
+
+ unsigned NumElts = RawMask.size();
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
}
+ int Index = RawMask[i] & (NumElts*2 - 1);
+ ShuffleMask.push_back(Index);
}
}
} // llvm namespace
define <8 x double> @combine_vpermt2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
; X32-LABEL: combine_vpermt2var_8f64_identity:
; X32: # BB#0:
-; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
-; X32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
-; X32-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
-; X32-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_8f64_identity:
define <8 x double> @combine_vpermt2var_8f64_movddup(<8 x double> %x0, <8 x double> %x1) {
; X32-LABEL: combine_vpermt2var_8f64_movddup:
; X32: # BB#0:
-; X32-NEXT: vmovapd {{.*#+}} zmm2 = <0,0,0,0,2,0,2,0,4,0,4,0,u,u,u,u>
-; X32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; X32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_8f64_movddup:
; X32-LABEL: combine_vpermt2var_8f64_movddup_load:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovapd (%eax), %zmm1
-; X32-NEXT: vmovapd {{.*#+}} zmm2 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
-; X32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; X32-NEXT: vmovapd %zmm1, %zmm0
+; X32-NEXT: vmovddup {{.*#+}} zmm0 = mem[0,0,2,2,4,4,6,6]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_8f64_movddup_load:
; X32: # BB#0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovd %eax, %k1
-; X32-NEXT: vmovapd {{.*#+}} zmm2 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
-; X32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0 {%k1} {z}
+; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_8f64_movddup_mask:
define <8 x double> @combine_vpermi2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
; X32-LABEL: combine_vpermi2var_8f64_identity:
; X32: # BB#0:
-; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
-; X32-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2
-; X32-NEXT: vmovapd {{.*#+}} zmm0 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
-; X32-NEXT: vpermi2pd %zmm2, %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_8f64_identity: