From 7709480219f97df6729f5a7af68630774852f874 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 7 Mar 2023 15:38:46 +0000 Subject: [PATCH] [X86] Lower vector umax(x,1) -> sub(x,cmpeq(x,0)) With suitable BooleanContent handling we could move this to TargetLowering::expandIntMINMAX, but this is good enough for X86 Fixes #61225 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 11 +++++++ llvm/test/CodeGen/X86/umax.ll | 54 +++++++++++---------------------- 2 files changed, 28 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 42559ae..4a59bba 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29839,6 +29839,17 @@ static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget, if (VT == MVT::v32i16 || VT == MVT::v64i8) return splitVectorIntBinary(Op, DAG); + // umax(x,1) --> sub(x,cmpeq(x,0)) + // TODO: Move this to expandIntMINMAX? + if (VT.isVector() && Op.getOpcode() == ISD::UMAX && + llvm::isOneOrOneSplat(Op.getOperand(1), true)) { + SDLoc DL(Op); + SDValue X = DAG.getFreeze(Op.getOperand(0)); + SDValue Zero = getZeroVector(VT, Subtarget, DAG, DL); + return DAG.getNode(ISD::SUB, DL, VT, X, + DAG.getSetCC(DL, VT, X, Zero, ISD::SETEQ)); + } + // Default to expand. return SDValue(); } diff --git a/llvm/test/CodeGen/X86/umax.ll b/llvm/test/CodeGen/X86/umax.ll index 55114f1..1c60157 100644 --- a/llvm/test/CodeGen/X86/umax.ll +++ b/llvm/test/CodeGen/X86/umax.ll @@ -423,26 +423,18 @@ define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { define <2 x i64> @test_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE-LABEL: test_v2i64_1: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] -; SSE-NEXT: pxor %xmm0, %xmm1 -; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2] -; SSE-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE-NEXT: pand %xmm3, %xmm2 -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE-NEXT: por %xmm2, %xmm1 -; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] +; SSE-NEXT: pand %xmm1, %xmm2 +; SSE-NEXT: psubq %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2i64_1: ; AVX: # %bb.0: -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vmovapd {{.*#+}} xmm2 = [1,1] -; AVX-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; X86-LABEL: test_v2i64_1: @@ -632,12 +624,8 @@ define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { define <4 x i32> @test_v4i32_1(<4 x i32> %a, <4 x i32> %b) nounwind { ; SSE-LABEL: test_v4i32_1: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; SSE-NEXT: pxor %xmm0, %xmm1 -; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: paddd %xmm1, %xmm0 -; SSE-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pcmpeqd %xmm0, %xmm1 ; SSE-NEXT: psubd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -781,20 +769,12 @@ define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { define <8 x i32> @test_v8i32_1(<8 x i32> %a, <8 x i32> %b) nounwind { ; SSE-LABEL: test_v8i32_1: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: pxor %xmm2, %xmm2 ; SSE-NEXT: movdqa %xmm0, %xmm3 -; SSE-NEXT: pxor %xmm2, %xmm3 -; SSE-NEXT: movdqa {{.*#+}} xmm4 = [2147483649,2147483649,2147483649,2147483649] -; SSE-NEXT: pcmpgtd %xmm4, %xmm3 -; SSE-NEXT: pand %xmm3, %xmm0 -; SSE-NEXT: paddd %xmm3, %xmm0 -; SSE-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE-NEXT: pcmpeqd %xmm2, %xmm3 ; SSE-NEXT: psubd %xmm3, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm2 -; SSE-NEXT: pcmpgtd %xmm4, %xmm2 -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: paddd %xmm2, %xmm1 -; SSE-NEXT: psubd %xmm3, %xmm1 +; SSE-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v8i32_1: @@ -945,9 +925,9 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { define <8 x i16> @test_v8i16_1(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE-LABEL: test_v8i16_1: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1] -; SSE-NEXT: psubusw %xmm0, %xmm1 -; SSE-NEXT: paddw %xmm1, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pcmpeqw %xmm0, %xmm1 +; SSE-NEXT: psubw %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i16_1: -- 2.7.4