From ef44228acba499cba328ad7ce399be21462e584c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Aug 2017 11:03:30 +0000 Subject: [PATCH] [DAGCombiner] Simplify shuffle mask index if the referenced input element is UNDEF Fixes one of the cases in PR34041. Differential Revision: https://reviews.llvm.org/D36393 llvm-svn: 310344 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 36 +++++++++++++++++++++++++++ llvm/test/CodeGen/X86/avx-vbroadcast.ll | 18 +++++--------- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4b57787..573002a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15112,6 +15112,38 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); } +static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0, + SDValue N1, SelectionDAG &DAG) { + // TODO - handle cases other than BUILD_VECTOR. + auto *BV0 = dyn_cast(N0); + auto *BV1 = dyn_cast(N1); + if (!BV0 && !BV1) + return SDValue(); + + EVT VT = SVN->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + bool Changed = false; + SmallVector NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (BV0 && 0 <= Idx && Idx < (int)NumElts && + BV0->getOperand(Idx).isUndef()) { + Changed = true; + Idx = -1; + } else if (BV1 && Idx > (int)NumElts && + BV1->getOperand(Idx - NumElts).isUndef()) { + Changed = true; + Idx = -1; + } + NewMask.push_back(Idx); + } + if (Changed) + return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask); + + return SDValue(); +} + // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, // or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { @@ -15461,6 +15493,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } + // Simplify shuffle mask if a referenced element is UNDEF. + if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG)) + return V; + // A shuffle of a single vector that is a splat can always be folded. if (auto *N0Shuf = dyn_cast(N0)) if (N1->isUndef() && N0Shuf->isSplat()) diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll index ccb77a2..97035ba 100644 --- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll @@ -836,14 +836,12 @@ define <4 x double> @broadcast_shuffle_1000(double* %p) { ; X32-LABEL: broadcast_shuffle_1000: ; X32: ## BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: vbroadcastsd (%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: broadcast_shuffle_1000: ; X64: ## BB#0: -; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: vbroadcastsd (%rdi), %ymm0 ; X64-NEXT: retq %1 = load double, double* %p %2 = insertelement <2 x double> undef, double %1, i32 0 @@ -855,18 +853,14 @@ define <4 x double> @broadcast_shuffle1032(double* %p) { ; X32-LABEL: broadcast_shuffle1032: ; X32: ## BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; X32-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: broadcast_shuffle1032: ; X64: ## BB#0: -; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq %1 = load double, double* %p %2 = insertelement <2 x double> undef, double %1, i32 1 -- 2.7.4