From a083f3caa1356ceaec2c250850928c01008f5f58 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 7 Jun 2022 16:42:15 +0100 Subject: [PATCH] [DAG] combineShuffleOfSplatVal - fold shuffle(splat,undef) -> splat, iff the splat contains no UNDEF elements As noticed on D127115 - we were missing this fold, instead just having the shuffle(shuffle(x,undef,splatmask),undef) fold. We should be able to merge these into one using SelectionDAG::isSplatValue, but we'll need to match the shuffle's undef handling first. This also exposed an issue in SelectionDAG::isSplatValue which was incorrectly propagating the undef mask across a bitcast (it was trying to just bail with a APInt::isSubsetOf if it found any undefs but that was actually the wrong way around so didn't fire for partial undef cases). --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 ++++++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +--- llvm/test/CodeGen/X86/vector-fshl-rot-256.ll | 30 +++++++++++--------------- llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 30 +++++++++++--------------- 4 files changed, 32 insertions(+), 39 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c69c3d7..0418f78 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21952,6 +21952,13 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { if (!Shuf->getOperand(1).isUndef()) return SDValue(); + + // If the inner operand is a known splat with no undefs, just return that directly. + // TODO: Create DemandedElts mask from Shuf's mask. + // TODO: Allow undef elements and merge with the shuffle code below. + if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false)) + return Shuf->getOperand(0); + auto *Splat = dyn_cast(Shuf->getOperand(0)); if (!Splat || !Splat->isSplat()) return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 6dfdfa7..87c64f7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2718,9 +2718,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, SubDemandedElts &= ScaledDemandedElts; if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1)) return false; - // TODO: Add support for merging sub undef elements. - if (SubDemandedElts.isSubsetOf(SubUndefElts)) - return false; + UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts); } return true; } diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll index 2be1d69..d45fc69 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -564,25 +564,19 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind { define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpsrlq %xmm2, %xmm4, %xmm5 -; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3] -; AVX1-NEXT: vpsrlq %xmm6, %xmm4, %xmm7 -; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7] -; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpsrlq %xmm6, %xmm0, %xmm6 -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2 -; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpsllq %xmm1, %xmm4, %xmm3 -; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 -; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vpsllq %xmm3, %xmm4, %xmm5 +; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3 +; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm2 +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_funnnel_v4i64: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index 7bab44e..1c00644 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -590,25 +590,19 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind { define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpsllq %xmm2, %xmm4, %xmm5 -; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3] -; AVX1-NEXT: vpsllq %xmm6, %xmm4, %xmm7 -; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7] -; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpsllq %xmm6, %xmm0, %xmm6 -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2 -; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm3 -; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 -; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vpsrlq %xmm3, %xmm4, %xmm5 +; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3 +; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpsllq %xmm1, %xmm4, %xmm2 +; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_funnnel_v4i64: -- 2.7.4