[X86][SSE] Basic combining of unary target shuffles of binary target shuffles.

author Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 10 Mar 2016 11:23:51 +0000 (11:23 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 10 Mar 2016 11:23:51 +0000 (11:23 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 10 Mar 2016 11:23:51 +0000 (11:23 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 10 Mar 2016 11:23:51 +0000 (11:23 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index b482371..3ae5136 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5266,8 +5266,7 @@ static bool setTargetShuffleZeroElements(SDValue N,
  /// remaining input indices in case we now have a unary shuffle and adjust the
  /// Op0/Op1 inputs accordingly.
  /// Returns true if the target shuffle mask was decoded.
-static bool resolveTargetShuffleInputs(SDValue Op, bool &IsUnary, SDValue &Op0,
-                                       SDValue &Op1,
+static bool resolveTargetShuffleInputs(SDValue Op, SDValue &Op0, SDValue &Op1,
                                         SmallVectorImpl<int> &Mask) {
    SmallVector<SDValue, 2> Ops;
    if (!setTargetShuffleZeroElements(Op, Mask, Ops))
@@ -5282,10 +5281,6 @@ static bool resolveTargetShuffleInputs(SDValue Op, bool &IsUnary, SDValue &Op0,
  
    Op0 = Op0InUse ? Ops[0] : SDValue();
    Op1 = Op1InUse ? Ops[1] : SDValue();
-  IsUnary = !(Op0InUse && Op1InUse);
-
-  if (!IsUnary)
-    return true;
  
    // We're only using Op1 - commute the mask and inputs.
    if (!Op0InUse && Op1InUse) {
@@ -24036,14 +24031,9 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
           "Can only combine shuffles of the same vector register size.");
  
    // Extract target shuffle mask and resolve sentinels and inputs.
-  bool IsUnary;
    SDValue Input0, Input1;
    SmallVector<int, 16> OpMask;
-  if (!resolveTargetShuffleInputs(Op, IsUnary, Input0, Input1, OpMask))
-    return false;
-
-  // At the moment we can only combine target shuffle unary cases.
-  if (!IsUnary)
+  if (!resolveTargetShuffleInputs(Op, Input0, Input1, OpMask))
      return false;
  
    assert(VT.getVectorNumElements() == OpMask.size() &&
@@ -24103,8 +24093,24 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
                                                  Subtarget, DAG, SDLoc(Root)));
      return true;
    }
+
+  int MaskSize = Mask.size();
+  bool UseInput0 = std::any_of(Mask.begin(), Mask.end(),
+                  [MaskSize](int Idx) { return 0 <= Idx && Idx < MaskSize; });
+  bool UseInput1 = std::any_of(Mask.begin(), Mask.end(),
+                  [MaskSize](int Idx) { return MaskSize <= Idx; });
+
+  // At the moment we can only combine unary shuffle mask cases.
+  if (UseInput0 && UseInput1)
+    return false;
+  else if (UseInput1) {
+    std::swap(Input0, Input1);
+    ShuffleVectorSDNode::commuteMask(Mask);
+  }
+
    assert(Input0 && "Shuffle with no inputs detected");
  
+  // TODO - generalize this to support any variable mask shuffle.
    HasPSHUFB |= (Op.getOpcode() == X86ISD::PSHUFB);
  
    // See if we can recurse into Input0 (if it's a target shuffle).
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll

index 5d970e1..0397366 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -929,18 +929,14 @@ define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
  ;
  ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
  ; SSE41:       # BB#0:
-; SSE41-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE41-NEXT:    xorps %xmm1, %xmm1
-; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
  ; SSE41-NEXT:    retq
  ;
  ; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
  ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
  ; AVX1-NEXT:    retq
  ;
  ; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll

index aecd45a..f66f0d4 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
@@ -63,13 +63,11 @@ define <4 x float> @combine_pshufb_movsldup(<4 x float> %a0) {
  define <16 x i8> @combine_unpckl_arg0_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
  ; SSE-LABEL: combine_unpckl_arg0_pshufb:
  ; SSE:       # BB#0:
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
  ; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: combine_unpckl_arg0_pshufb:
  ; AVX:       # BB#0:
-; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
  ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
  ; AVX-NEXT:    retq
    %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -80,14 +78,13 @@ define <16 x i8> @combine_unpckl_arg0_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
  define <16 x i8> @combine_unpckl_arg1_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
  ; SSE-LABEL: combine_unpckl_arg1_pshufb:
  ; SSE:       # BB#0:
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero
+; SSE-NEXT:    movdqa %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: combine_unpckl_arg1_pshufb:
  ; AVX:       # BB#0:
-; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero
  ; AVX-NEXT:    retq
    %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
    %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1>)
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 10 Mar 2016 11:23:51 +0000 (11:23 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 10 Mar 2016 11:23:51 +0000 (11:23 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll		patch \| blob \| history