[X86][SSE] combineX86ShuffleChain - combine INSERT_VECTOR_ELT patterns to INSERTPS

author Simon Pilgrim <llvm-dev@redking.me.uk>

Fri, 12 Jun 2020 10:30:00 +0000 (11:30 +0100)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Fri, 12 Jun 2020 10:59:01 +0000 (11:59 +0100)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Fri, 12 Jun 2020 10:30:00 +0000 (11:30 +0100)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Fri, 12 Jun 2020 10:59:01 +0000 (11:59 +0100)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index b4299d5..7db5770 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34444,6 +34444,25 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
      }
    }
  
+  // Attempt to combine to INSERTPS, but only if the inserted element has come
+  // from a scalar.
+  // TODO: Handle other insertions here as well?
+  if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 &&
+      MaskEltSizeInBits == 32 && Subtarget.hasSSE41() &&
+      !isTargetShuffleEquivalent(Mask, {4, 1, 2, 3})) {
+    SDValue SrcV1 = V1, SrcV2 = V2;
+    if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask, DAG) &&
+        SrcV2.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+      if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS)
+        return SDValue(); // Nothing to do!
+      Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32,
+                        DAG.getBitcast(MVT::v4f32, SrcV1),
+                        DAG.getBitcast(MVT::v4f32, SrcV2),
+                        DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
+      return DAG.getBitcast(RootVT, Res);
+    }
+  }
+
    SDValue NewV1 = V1; // Save operands in case early exit happens.
    SDValue NewV2 = V2;
    if (matchBinaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1,
diff --git a/llvm/test/CodeGen/X86/build-vector-128.ll b/llvm/test/CodeGen/X86/build-vector-128.ll

index b80f6fa..84ebabc 100644 (file)
--- a/llvm/test/CodeGen/X86/build-vector-128.ll
+++ b/llvm/test/CodeGen/X86/build-vector-128.ll
@@ -540,17 +540,11 @@ define <4 x float> @PR37502(float %x, float %y) {
  ; AVX-32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
  ; AVX-32-NEXT:    retl
  ;
-; AVX1-64-LABEL: PR37502:
-; AVX1-64:       # %bb.0:
-; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX1-64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; AVX1-64-NEXT:    retq
-;
-; AVX2-64-LABEL: PR37502:
-; AVX2-64:       # %bb.0:
-; AVX2-64-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; AVX2-64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; AVX2-64-NEXT:    retq
+; AVX-64-LABEL: PR37502:
+; AVX-64:       # %bb.0:
+; AVX-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-64-NEXT:    retq
    %i0 = insertelement <4 x float> undef, float %x, i32 0
    %i1 = insertelement <4 x float> %i0, float %y, i32 1
    %i2 = insertelement <4 x float> %i1, float %x, i32 2
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll

index 4a10a20..f813fec 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -288,7 +288,7 @@ define void @buildvector_v4f32_0404(float %a, float %b, <4 x float>* %ptr) {
  ;
  ; X64-AVX2-LABEL: buildvector_v4f32_0404:
  ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-AVX2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
  ; X64-AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
  ; X64-AVX2-NEXT:    vmovaps %xmm0, (%rdi)
  ; X64-AVX2-NEXT:    retq
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Fri, 12 Jun 2020 10:30:00 +0000 (11:30 +0100)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Fri, 12 Jun 2020 10:59:01 +0000 (11:59 +0100)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/build-vector-128.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll		patch \| blob \| history