From 9de0a3da6a76030f96a2d6793ca4f094fa538db5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 16:11:40 +0100 Subject: [PATCH] [X86][SSE] Don't use LowerVSETCCWithSUBUS for unsigned compare with +ve operands (PR47448) We already simplify the unsigned comparisons if we've found the operands are non-negative, but we were still calling LowerVSETCCWithSUBUS which resulted in the PR47448 regressions. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++-- llvm/test/CodeGen/X86/vector-unsigned-cmp.ll | 33 ++++++++++------------------ 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a8a41d9..1395db5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -22731,8 +22731,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, } // Try to use SUBUS and PCMPEQ. - if (SDValue V = LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG)) - return V; + if (FlipSigns) + if (SDValue V = + LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG)) + return V; // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple diff --git a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll index c770729..d6849a9 100644 --- a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll @@ -285,9 +285,9 @@ define <8 x i1> @uge_v8i16(<8 x i16> %x, <8 x i16> %y) { ; SSE2: # %bb.0: ; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: psubusw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE2-NEXT: pcmpgtw %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: uge_v8i16: @@ -316,9 +316,9 @@ define <8 x i1> @ule_v8i16(<8 x i16> %x, <8 x i16> %y) { ; SSE2: # %bb.0: ; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: psubusw %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE2-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ule_v8i16: @@ -459,9 +459,9 @@ define <8 x i16> @PR47448_uge(i16 signext %0) { ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] -; SSE2-NEXT: psubusw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE2-NEXT: pcmpgtw %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: PR47448_uge: @@ -508,10 +508,7 @@ define <8 x i16> @PR47448_ugt(i16 signext %0) { ; SSE-NEXT: movd %edi, %xmm0 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE-NEXT: psubusw %xmm0, %xmm1 -; SSE-NEXT: pxor %xmm0, %xmm0 -; SSE-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: PR47448_ugt: @@ -520,10 +517,7 @@ define <8 x i16> @PR47448_ugt(i16 signext %0) { ; AVX1-NEXT: vmovd %edi, %xmm0 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; AVX1-NEXT: vpsubusw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: PR47448_ugt: @@ -531,10 +525,7 @@ define <8 x i16> @PR47448_ugt(i16 signext %0) { ; AVX2-NEXT: andl $7, %edi ; AVX2-NEXT: vmovd %edi, %xmm0 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; AVX2-NEXT: vpsubusw %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq %2 = and i16 %0, 7 %3 = insertelement <8 x i16> undef, i16 %2, i32 0 -- 2.7.4