[X86][SSE4A] Add support for shuffle combining to INSERTQI.

author Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 6 Jul 2017 15:34:17 +0000 (15:34 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 6 Jul 2017 15:34:17 +0000 (15:34 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 6 Jul 2017 15:34:17 +0000 (15:34 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 6 Jul 2017 15:34:17 +0000 (15:34 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 60bc5a5..30353c8 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27714,6 +27714,22 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
                      /*AddTo*/ true);
        return true;
      }
+
+    if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
+      if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
+        return false; // Nothing to do!
+      V1 = DAG.getBitcast(IntMaskVT, V1);
+      DCI.AddToWorklist(V1.getNode());
+      V2 = DAG.getBitcast(IntMaskVT, V2);
+      DCI.AddToWorklist(V2.getNode());
+      Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
+                        DAG.getConstant(BitLen, DL, MVT::i8),
+                        DAG.getConstant(BitIdx, DL, MVT::i8));
+      DCI.AddToWorklist(Res.getNode());
+      DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+                    /*AddTo*/ true);
+      return true;
+    }
    }
  
    // Don't try to re-form single instruction chains under any circumstances now
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll

index a63c77b..af69a5a 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
@@ -75,19 +75,10 @@ define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) {
  }
  
  define <16 x i8> @combine_pshufb_insertqi_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
-; SSE-LABEL: combine_pshufb_insertqi_pshufb:
-; SSE:       # BB#0:
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u]
-; SSE-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[7,1,2,4,3,u,u,0,u,u,u,u,u,u,u,u]
-; SSE-NEXT:    retq
-;
-; AVX-LABEL: combine_pshufb_insertqi_pshufb:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,1,2,4,3,u,u,0,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    retq
+; ALL-LABEL: combine_pshufb_insertqi_pshufb:
+; ALL:       # BB#0:
+; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
    %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
    %2 = shufflevector <16 x i8> %1, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 17, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 7, i8 1, i8 2, i8 4, i8 3, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 6 Jul 2017 15:34:17 +0000 (15:34 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 6 Jul 2017 15:34:17 +0000 (15:34 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll		patch \| blob \| history