From d76a4d0ac676f387113f082c06754c42a57ad08e Mon Sep 17 00:00:00 2001
From: Zvi Rackover <zvi.rackover@intel.com>
Date: Mon, 3 Apr 2017 17:41:19 +0000
Subject: [PATCH] Revert "[DAGCombine]  A shuffle of a splat is always the
 splat itself"

This reverts commit r299047 which is incorrect because the
simplification may result in incorrect propogation of undefs to users of
the folded shuffle.

Thanks to Andrea Di Biagio for pointing this out.

llvm-svn: 299368
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp       |  6 ------
 llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll | 16 ++++++++++------
 2 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 82c61fa..779a0b5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14633,12 +14633,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
   }
 
-  // A shuffle of a splat is always the splat itself:
-  // shuffle (splat-shuffle), undef, M --> splat-shuffle
-  if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
-    if (N1.isUndef() && N0Shuf->isSplat())
-      return N0;
-
   // If it is a splat, check if the argument vector is another splat or a
   // build_vector.
   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
diff --git a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
index e7011df..38d45ca 100644
--- a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
+++ b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
@@ -5,8 +5,9 @@
 define <2 x double> @foo2(<2 x double> %v, <2 x double> *%p) nounwind {
 ; AVX2-LABEL: foo2:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
-; AVX2-NEXT:    vmovapd %xmm0, (%rdi)
+; AVX2-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,1]
+; AVX2-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
+; AVX2-NEXT:    vmovapd %xmm1, (%rdi)
 ; AVX2-NEXT:    retq
   %res = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
   %res1 = shufflevector<2 x double> %res, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
@@ -17,8 +18,9 @@ define <2 x double> @foo2(<2 x double> %v, <2 x double> *%p) nounwind {
 define <4 x double> @foo4(<4 x double> %v, <4 x double> *%p) nounwind {
 ; AVX2-LABEL: foo4:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
-; AVX2-NEXT:    vmovapd %ymm0, (%rdi)
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[2,2,2,2]
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm1[2,0,2,3]
+; AVX2-NEXT:    vmovapd %ymm1, (%rdi)
 ; AVX2-NEXT:    retq
   %res = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
   %res1 = shufflevector<4 x double> %res, <4 x double> undef, <4 x i32> <i32 2, i32 0, i32 undef, i32 undef>
@@ -30,8 +32,10 @@ define <8 x float> @foo8(<8 x float> %v, <8 x float> *%p) nounwind {
 ; AVX2-LABEL: foo8:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
-; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
-; AVX2-NEXT:    vmovapd %ymm0, (%rdi)
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[2,2,2,2]
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = <2,0,u,u,5,1,3,7>
+; AVX2-NEXT:    vpermps %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovapd %ymm1, (%rdi)
 ; AVX2-NEXT:    retq
   %res = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   %res1 = shufflevector<8 x float> %res, <8 x float> undef, <8 x i32> <i32 2, i32 0, i32 undef, i32 undef, i32 5, i32 1, i32 3, i32 7>
-- 
2.7.4