From 925093d88ae74560a8e94cf66f95d60ea3ffa2d3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 1 Mar 2021 14:47:24 +0000 Subject: [PATCH] [X86] Fold shuffle(not(x),undef) -> not(shuffle(x,undef)) Move NOT out to expose more AND -> ANDN folds --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 +++++++++++++ llvm/test/CodeGen/X86/combine-bitselect.ll | 30 ++++++------------------------ llvm/test/CodeGen/X86/promote-cmp.ll | 11 +++++------ 3 files changed, 24 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 86052fa..2e90222 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38003,6 +38003,19 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, if (SDValue HAddSub = foldShuffleOfHorizOp(N, DAG)) return HAddSub; + + // Fold shuffle(not(x),undef) -> not(shuffle(x,undef)). + if (N->getOpcode() == ISD::VECTOR_SHUFFLE && + N->getOperand(0).getOpcode() == ISD::XOR && + N->getOperand(1).isUndef() && + N->isOnlyUserOf(N->getOperand(0).getNode())) { + if (SDValue Not = IsNOT(N->getOperand(0), DAG, true)) { + SDValue NewShuffle = DAG.getVectorShuffle( + VT, dl, DAG.getBitcast(VT, Not), DAG.getUNDEF(VT), + cast(N)->getMask()); + return DAG.getNOT(dl, NewShuffle, VT); + } + } } // Attempt to combine into a vector load/broadcast. diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll index d57bd87..5c8b8f6 100644 --- a/llvm/test/CodeGen/X86/combine-bitselect.ll +++ b/llvm/test/CodeGen/X86/combine-bitselect.ll @@ -505,26 +505,18 @@ define <4 x i64> @bitselect_v4i64_broadcast_rrr(<4 x i64> %a0, <4 x i64> %a1, i6 ; XOP-LABEL: bitselect_v4i64_broadcast_rrr: ; XOP: # %bb.0: ; XOP-NEXT: vmovq %rdi, %xmm2 -; XOP-NEXT: vmovq %rdi, %xmm3 ; XOP-NEXT: vmovddup {{.*#+}} xmm2 = xmm2[0,0] ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; XOP-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,1] -; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 -; XOP-NEXT: vandps %ymm2, %ymm0, %ymm0 -; XOP-NEXT: vandnps %ymm1, %ymm3, %ymm1 -; XOP-NEXT: vorps %ymm1, %ymm0, %ymm0 +; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 ; XOP-NEXT: retq ; ; AVX1-LABEL: bitselect_v4i64_broadcast_rrr: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovq %rdi, %xmm2 -; AVX1-NEXT: vmovq %rdi, %xmm3 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = xmm2[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 -; AVX1-NEXT: vandnps %ymm1, %ymm3, %ymm1 +; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -881,32 +873,22 @@ define <8 x i64> @bitselect_v8i64_broadcast_rrr(<8 x i64> %a0, <8 x i64> %a1, i6 ; XOP-LABEL: bitselect_v8i64_broadcast_rrr: ; XOP: # %bb.0: ; XOP-NEXT: vmovq %rdi, %xmm4 -; XOP-NEXT: vmovq %rdi, %xmm5 ; XOP-NEXT: vmovddup {{.*#+}} xmm4 = xmm4[0,0] ; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4 -; XOP-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[0,1,0,1] -; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm5, %ymm5 -; XOP-NEXT: vandps %ymm4, %ymm1, %ymm1 -; XOP-NEXT: vandps %ymm4, %ymm0, %ymm0 -; XOP-NEXT: vandnps %ymm3, %ymm5, %ymm3 -; XOP-NEXT: vorps %ymm3, %ymm1, %ymm1 -; XOP-NEXT: vandnps %ymm2, %ymm5, %ymm2 -; XOP-NEXT: vorps %ymm2, %ymm0, %ymm0 +; XOP-NEXT: vpcmov %ymm4, %ymm2, %ymm0, %ymm0 +; XOP-NEXT: vpcmov %ymm4, %ymm3, %ymm1, %ymm1 ; XOP-NEXT: retq ; ; AVX1-LABEL: bitselect_v8i64_broadcast_rrr: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovq %rdi, %xmm4 -; AVX1-NEXT: vmovq %rdi, %xmm5 ; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = xmm4[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4 -; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm5, %ymm5 ; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1 ; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 -; AVX1-NEXT: vandnps %ymm3, %ymm5, %ymm3 +; AVX1-NEXT: vandnps %ymm3, %ymm4, %ymm3 ; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1 -; AVX1-NEXT: vandnps %ymm2, %ymm5, %ymm2 +; AVX1-NEXT: vandnps %ymm2, %ymm4, %ymm2 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll index c59f808..1350b2b 100644 --- a/llvm/test/CodeGen/X86/promote-cmp.ll +++ b/llvm/test/CodeGen/X86/promote-cmp.ll @@ -47,17 +47,16 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) { ; SSE4-LABEL: PR45808: ; SSE4: # %bb.0: ; SSE4-NEXT: movdqa %xmm0, %xmm4 +; SSE4-NEXT: movdqa %xmm0, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm2, %xmm5 ; SSE4-NEXT: movdqa %xmm1, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm3, %xmm0 -; SSE4-NEXT: movdqa %xmm4, %xmm5 -; SSE4-NEXT: pcmpgtq %xmm2, %xmm5 -; SSE4-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] ; SSE4-NEXT: pcmpeqd %xmm6, %xmm6 -; SSE4-NEXT: pxor %xmm5, %xmm6 +; SSE4-NEXT: pxor %xmm6, %xmm5 ; SSE4-NEXT: psllq $63, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm3 -; SSE4-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm6[0],zero,xmm6[1],zero -; SSE4-NEXT: psllq $63, %xmm0 +; SSE4-NEXT: psllq $63, %xmm5 +; SSE4-NEXT: movdqa %xmm5, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm4, %xmm2 ; SSE4-NEXT: movapd %xmm2, %xmm0 ; SSE4-NEXT: movapd %xmm3, %xmm1 -- 2.7.4