[DagCombiner] Allow shuffles to merge through bitcasts

author Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 5 Mar 2015 17:14:04 +0000 (17:14 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 5 Mar 2015 17:14:04 +0000 (17:14 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 5 Mar 2015 17:14:04 +0000 (17:14 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 5 Mar 2015 17:14:04 +0000 (17:14 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 4a074a0..4cf3434 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11877,6 +11877,89 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
        return V;
    }
  
+  // If this shuffle only has a single input that is a bitcasted shuffle,
+  // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
+  // back to their original types.
+  if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+      N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
+      TLI.isTypeLegal(VT)) {
+
+    // Peek through the bitcast only if there is one user.
+    SDValue BC0 = N0;
+    while (BC0.getOpcode() == ISD::BITCAST) {
+      if (!BC0.hasOneUse())
+        break;
+      BC0 = BC0.getOperand(0);
+    }
+
+    auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
+      if (Scale == 1)
+        return SmallVector<int, 8>(Mask.begin(), Mask.end());
+
+      SmallVector<int, 8> NewMask;
+      for (int M : Mask)
+        for (int s = 0; s != Scale; ++s)
+          NewMask.push_back(M < 0 ? -1 : Scale * M + s);
+      return NewMask;
+    };
+
+    if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
+      EVT SVT = VT.getScalarType();
+      EVT InnerVT = BC0->getValueType(0);
+      EVT InnerSVT = InnerVT.getScalarType();
+
+      // Determine which shuffle works with the smaller scalar type.
+      EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
+      EVT ScaleSVT = ScaleVT.getScalarType();
+
+      if (TLI.isTypeLegal(ScaleVT) &&
+          0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
+          0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
+
+        int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+        int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+
+        // Scale the shuffle masks to the smaller scalar type.
+        ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
+        SmallVector<int, 8> InnerMask =
+            ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
+        SmallVector<int, 8> OuterMask =
+            ScaleShuffleMask(SVN->getMask(), OuterScale);
+
+        // Merge the shuffle masks.
+        SmallVector<int, 8> NewMask;
+        for (int M : OuterMask)
+          NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
+
+        // Test for shuffle mask legality over both commutations.
+        SDValue SV0 = BC0->getOperand(0);
+        SDValue SV1 = BC0->getOperand(1);
+        bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+        if (!LegalMask) {
+          for (int i = 0, e = (int)NewMask.size(); i != e; ++i) {
+            int idx = NewMask[i];
+            if (idx < 0)
+              continue;
+            else if (idx < e)
+              NewMask[i] = idx + e;
+            else
+              NewMask[i] = idx - e;
+          }
+          std::swap(SV0, SV1);
+          LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+        }
+
+        if (LegalMask) {
+          SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
+          SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
+          return DAG.getNode(
+              ISD::BITCAST, SDLoc(N), VT,
+              DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
+        }
+      }
+    }
+  }
+
    // Canonicalize shuffles according to rules:
    //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
    //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
diff --git a/llvm/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll b/llvm/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll

deleted file mode 100644 (file)

index 614ccda..0000000
--- a/llvm/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-pc-win32 | FileCheck %s
-
-; CHECK: test
-; CHECK: vpmovzxwd
-; CHECK: vpmovzxwd
-define void @test(<4 x i64> %a, <4 x i16>* %buf) {
-  %ex1 = extractelement <4 x i64> %a, i32 0
-  %ex2 = extractelement <4 x i64> %a, i32 1
-  %x1 = bitcast i64 %ex1 to <4 x i16>
-  %x2 = bitcast i64 %ex2 to <4 x i16>
-  %Sh = shufflevector <4 x i16> %x1, <4 x i16> %x2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  store <4 x i16> %Sh, <4 x i16>* %buf, align 1
-  ret void
-}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll

index c271622..01b8972 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -1336,3 +1336,22 @@ define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(
    %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16, i32 undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16>
    ret <16 x i8> %shuffle
  }
+
+define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_bitcast_unpack:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_bitcast_unpack:
+; AVX:       # BB#0:
+; AVX-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT:    retq
+  %shuffle8  = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32 2, i32 18, i32 1, i32 17, i32 0, i32 16>
+  %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float>
+  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16>
+  %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  %bitcast8  = bitcast <8 x i16> %shuffle16 to <16 x i8>
+  ret <16 x i8> %bitcast8
+}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll

index 30eceac..ee68df5 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -810,6 +810,25 @@ define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
    ret <2 x double> %shuffle
  }
  
+define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
+; SSE-LABEL: shuffle_v2f64_bitcast_1z:
+; SSE:       # BB#0:
+; SSE-NEXT:    xorpd %xmm1, %xmm1
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_bitcast_1z:
+; AVX:       # BB#0:
+; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; AVX-NEXT:    retq
+  %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
+  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
+  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
+  ret <2 x double> %bitcast64
+}
+
  define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
  ; SSE-LABEL: insert_reg_and_zero_v2i64:
  ; SSE:       # BB#0:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll

index 2021905..8612a5a 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -1574,6 +1574,23 @@ define <4 x i32> @shuffle_v4i32_0zz3(<4 x i32> %a) {
    ret <4 x i32> %shuffle
  }
  
+define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: shuffle_v4i32_bitcast_0415:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_bitcast_0415:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT:    retq
+  %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 4>
+  %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double>
+  %shuffle64 = shufflevector <2 x double> %bitcast64, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x i32>
+  ret <4 x i32> %bitcast32
+}
+
  define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
  ; SSE-LABEL: insert_reg_and_zero_v4i32:
  ; SSE:       # BB#0:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll

index 0ac9a2b..8aca67c 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -922,3 +922,22 @@ define <4 x double> @splat_v4f64(<2 x double> %r) {
    %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
    ret <4 x double> %1
  }
+
+define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: bitcast_v4f64_0426:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: bitcast_v4f64_0426:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpunpcklqdq  {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX2-NEXT:    retq
+  %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
+  %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
+  %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16>
+  %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
+  %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
+  ret <4 x double> %bitcast64
+}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll

index c585414..094722d 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
@@ -8,14 +8,14 @@ define void @test0(<1 x i64>* %x) {
  ; X32:       ## BB#0: ## %entry
  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
+; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
  ; X32-NEXT:    movlpd %xmm0, (%eax)
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test0:
  ; X64:       ## BB#0: ## %entry
  ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
  ; X64-NEXT:    movq %xmm0, (%rdi)
  ; X64-NEXT:    retq
  entry:
@@ -84,16 +84,15 @@ define void @test2() nounwind {
  ; X32:       ## BB#0: ## %entry
  ; X32-NEXT:    movl L_tmp_V2i$non_lazy_ptr, %eax
  ; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
-; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT:    movlpd %xmm0, (%eax)
+; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32-NEXT:    movlps %xmm0, (%eax)
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test2:
  ; X64:       ## BB#0: ## %entry
  ; X64-NEXT:    movq _tmp_V2i@{{.*}}(%rip), %rax
  ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
  ; X64-NEXT:    movq %xmm0, (%rax)
  ; X64-NEXT:    retq
  entry:
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 5 Mar 2015 17:14:04 +0000 (17:14 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 5 Mar 2015 17:14:04 +0000 (17:14 +0000)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll	[deleted file]	patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-mmx.ll		patch \| blob \| history