}
}
+ // Attempt to combine to INSERTPS, but only if the inserted element has come
+ // from a scalar.
+ // TODO: Handle other insertions here as well?
+ if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 &&
+ MaskEltSizeInBits == 32 && Subtarget.hasSSE41() &&
+ !isTargetShuffleEquivalent(Mask, {4, 1, 2, 3})) {
+ SDValue SrcV1 = V1, SrcV2 = V2;
+ if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask, DAG) &&
+ SrcV2.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS)
+ return SDValue(); // Nothing to do!
+ Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32,
+ DAG.getBitcast(MVT::v4f32, SrcV1),
+ DAG.getBitcast(MVT::v4f32, SrcV2),
+ DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
+ return DAG.getBitcast(RootVT, Res);
+ }
+ }
+
SDValue NewV1 = V1; // Save operands in case early exit happens.
SDValue NewV2 = V2;
if (matchBinaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1,
; AVX-32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX-32-NEXT: retl
;
-; AVX1-64-LABEL: PR37502:
-; AVX1-64: # %bb.0:
-; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX1-64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; AVX1-64-NEXT: retq
-;
-; AVX2-64-LABEL: PR37502:
-; AVX2-64: # %bb.0:
-; AVX2-64-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; AVX2-64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; AVX2-64-NEXT: retq
+; AVX-64-LABEL: PR37502:
+; AVX-64: # %bb.0:
+; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-64-NEXT: retq
%i0 = insertelement <4 x float> undef, float %x, i32 0
%i1 = insertelement <4 x float> %i0, float %y, i32 1
%i2 = insertelement <4 x float> %i1, float %x, i32 2
;
; X64-AVX2-LABEL: buildvector_v4f32_0404:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; X64-AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X64-AVX2-NEXT: vmovaps %xmm0, (%rdi)
; X64-AVX2-NEXT: retq