}
return true;
}
+ case X86ISD::VBROADCAST: {
+ SDValue Src = N.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ if (!SrcVT.isVector())
+ return false;
+
+ if (NumSizeInBits != SrcVT.getSizeInBits()) {
+ assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
+ "Illegal broadcast type");
+ SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
+ NumSizeInBits / SrcVT.getScalarSizeInBits());
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), SrcVT,
+ DAG.getUNDEF(SrcVT), Src,
+ DAG.getIntPtrConstant(0, SDLoc(N)));
+ }
+
+ Ops.push_back(Src);
+ Mask.append(NumElts, 0);
+ return true;
+ }
case ISD::ZERO_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND: {
SDValue Src = N.getOperand(0);
; CHECK-LABEL: test_masked_8xi64_to_4xi64_perm_mask7:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm3
-; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
-; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [2,0,3,7]
+; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [2,0,3,4]
; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
; CHECK-NEXT: vpblendmq %ymm4, %ymm1, %ymm0 {%k1}
define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; CHECK-NEXT: vpbroadcastq %xmm2, %ymm3
-; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [2,0,3,7]
+; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm3
+; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [2,0,3,4]
; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
define <4 x double> @test_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec) {
; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1
-; CHECK-NEXT: vbroadcastsd %xmm1, %ymm2
-; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,1,7]
+; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm2
+; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,1,4]
; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3
-; CHECK-NEXT: vbroadcastsd %xmm3, %ymm3
-; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,7]
+; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,4]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm2
-; CHECK-NEXT: vbroadcastsd %xmm2, %ymm3
-; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,7]
+; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3
+; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,4]
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z}
define <8 x float> @expand5(<4 x float> %a ) {
; SKX64-LABEL: expand5:
; SKX64: # %bb.0:
-; SKX64-NEXT: vbroadcastss %xmm0, %ymm0
+; SKX64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; SKX64-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; SKX64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
+; SKX64-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,10,0,12,0,14,0]
+; SKX64-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
; SKX64-NEXT: retq
;
; KNL64-LABEL: expand5:
;
; SKX32-LABEL: expand5:
; SKX32: # %bb.0:
-; SKX32-NEXT: vbroadcastss %xmm0, %ymm0
+; SKX32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; SKX32-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; SKX32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
+; SKX32-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,10,0,12,0,14,0]
+; SKX32-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
; SKX32-NEXT: retl
;
; KNL32-LABEL: expand5: